String

Pattern matching

DNA_s= "ATTTGCATGCAAAGGGCATGCAG"
Pattern= "ATT"

Matches = grepl(Pattern, DNA_s)
print(Matches)
[1] TRUE
Matches = grepl("ATTTTCGG", DNA_s)
print(Matches)
[1] FALSE

Split

genes ="gene1,gene2,gene3"
split_genes=strsplit(genes, ",")
print(split_genes)
[[1]]
[1] "gene1" "gene2" "gene3"
paragraph = "This paragraph will be split for spaces"
split_paragraph=strsplit(paragraph, " ")
print(split_paragraph)
[[1]]
[1] "This"      "paragraph" "will"      "be"        "split"     "for"       "spaces"   
sentence= "this is sentence 1. second sentence. third sentence."
split_sentence=strsplit(sentence, "\\. ")
print(split_sentence)
[[1]]
[1] "this is sentence 1" "second sentence"    "third sentence."   

Reverse

library(stringi)
s1= "ABCDEFG"
rev_s1= stri_reverse(s1)
print(rev_s1)
[1] "GFEDCBA"

Number of occurences

DNA_s= "ATTTGCATGATTCAAAGGGCATGCAG"
Pattern= "ATT"

locations = gregexpr(Pattern, DNA_s)
locations =unlist(locations)
print(locations)
[1]  1 10
num_occ= sum(locations !=-1)
print(num_occ)
[1] 2

Vector

Create vector

ages= c(20, 25,18,65,18,45)
print(ages)
[1] 20 25 18 65 18 45

Indexing

ages[1]
[1] 20
ages[-1]
[1] 25 18 65 18 45
ages[70]
[1] NA

Append

ages= c(ages, 100, 25)
ages
[1]  20  25  18  65  18  45 100  25

Subset or slice

ages
[1]  20  25  18  65  18  45 100  25
print(ages[3:6])
[1] 18 65 18 45
print(ages[1:4])
[1] 20 25 18 65
print(ages[6:20])
 [1]  45 100  25  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA

Length

print(length(ages))
[1] 8

Insert in a specific position

print(ages)
[1]  20  25  18  65  18  45 100  25
print(ages[1:2])
[1] 20 25
ages_extended=c

Vector Conditional

age= c(18,20,18,40,50,60)
print(age)
[1] 18 20 18 40 50 60
age_50=age[age<=50]
age_50
[1] 18 20 18 40 50
age_25_50=age[age>=25 & age<=50]
age_25_50
[1] 40 50
age_25_50[1]
[1] 40
ages[ages >=25 & ages<=50]
[1] 25 45 25
ages
[1]  20  25  18  65  18  45 100  25

Sorting

sorted_ages=sort(ages)


sort(sorted_ages, TRUE)
[1] 100  65  45  25  25  20  18  18

Simple Statistics

ages
[1]  20  25  18  65  18  45 100  25
max(ages)
[1] 100
max(ages[2:5])
[1] 65
min(ages)
[1] 18
mean(ages)
[1] 39.5
median(ages)
[1] 25
sum(ages)
[1] 316
sd(ages)
[1] 29.43759
var(ages)
[1] 866.5714
quantile(ages)
   0%   25%   50%   75%  100% 
 18.0  19.5  25.0  50.0 100.0 
mode(ages)
[1] "numeric"
length(ages)
[1] 8
scores= c(10, 20, 30,40,50,60,70,80)

length(scores)
[1] 8
cor(ages, scores)
[1] 0.4398222

Data frame

exam_score = data.frame(
  ID= c(1, 2, 3, 4, 5),
  Names= c("Amzad", "Kalam", "Subhan", "Chang", "Bran"),
  Age= c(20,22,23,24,25),
  Score= c(100, 92, 80,60,55)
)

print(exam_score)
exam_score[1:3, 3:4]
exam_score[2, 3:4]

exam_score[2, ]
exam_score[ , 4]
[1] 100  92  80  60  55
exam_score[3:4, 1]
[1] 3 4
exam_score[ , 2]
[1] "Amzad"  "Kalam"  "Subhan" "Chang"  "Bran"  

make table with only name and age

exam_score$Names
[1] "Amzad"  "Kalam"  "Subhan" "Chang"  "Bran"  
exam_score$Age
[1] 20 22 23 24 25
selected_columns= data.frame(
  Names= exam_score$Names,
  Age=exam_score$Age
)

print(selected_columns)

Alternative way

exam_score[c(1,2,3,4,5), c(2,3)]
exam_score[exam_score$Score>=80 & exam_score$Score<95, ]

when you have to select specific ID containing rows, use %in% . this operation will select only 1,5 in ID.

selected_ID= c(1,5)
exam_score[exam_score$ID %in% selected_ID, ]
selected_Age= c(20,22)
exam_score[exam_score$Age %in% selected_Age, ]

Adding new rows and columnsq

exam_score
new_row= c(10, "Sam", 55, 100)

exam_score2= rbind(exam_score, new_row)

exam_score2


Location = c("dhk", "ctg", "bars", "jfk", "mms", "cml")
exam_score3= cbind(exam_score2, Location)
exam_score3
LS0tDQp0aXRsZTogIkxlY3R1cmUgMiINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCiMgU3RyaW5nDQojIyBQYXR0ZXJuIG1hdGNoaW5nDQoNCmBgYHtyfQ0KRE5BX3M9ICJBVFRUR0NBVEdDQUFBR0dHQ0FUR0NBRyINClBhdHRlcm49ICJBVFQiDQoNCk1hdGNoZXMgPSBncmVwbChQYXR0ZXJuLCBETkFfcykNCnByaW50KE1hdGNoZXMpDQoNCk1hdGNoZXMgPSBncmVwbCgiQVRUVFRDR0ciLCBETkFfcykNCnByaW50KE1hdGNoZXMpDQpgYGANCg0KIyMgU3BsaXQNCmBgYHtyfQ0KZ2VuZXMgPSJnZW5lMSxnZW5lMixnZW5lMyINCnNwbGl0X2dlbmVzPXN0cnNwbGl0KGdlbmVzLCAiLCIpDQpwcmludChzcGxpdF9nZW5lcykNCg0KDQpwYXJhZ3JhcGggPSAiVGhpcyBwYXJhZ3JhcGggd2lsbCBiZSBzcGxpdCBmb3Igc3BhY2VzIg0Kc3BsaXRfcGFyYWdyYXBoPXN0cnNwbGl0KHBhcmFncmFwaCwgIiAiKQ0KcHJpbnQoc3BsaXRfcGFyYWdyYXBoKQ0KDQpzZW50ZW5jZT0gInRoaXMgaXMgc2VudGVuY2UgMS4gc2Vjb25kIHNlbnRlbmNlLiB0aGlyZCBzZW50ZW5jZS4iDQpzcGxpdF9zZW50ZW5jZT1zdHJzcGxpdChzZW50ZW5jZSwgIlxcLiAiKQ0KcHJpbnQoc3BsaXRfc2VudGVuY2UpDQpgYGANCg0KDQojIyBSZXZlcnNlDQpgYGB7cn0NCmxpYnJhcnkoc3RyaW5naSkNCnMxPSAiQUJDREVGRyINCnJldl9zMT0gc3RyaV9yZXZlcnNlKHMxKQ0KcHJpbnQocmV2X3MxKQ0KYGBgDQoNCg0KIyMgTnVtYmVyIG9mIG9jY3VyZW5jZXMNCmBgYHtyfQ0KRE5BX3M9ICJBVFRUR0NBVEdBVFRDQUFBR0dHQ0FUR0NBRyINClBhdHRlcm49ICJBVFQiDQoNCmxvY2F0aW9ucyA9IGdyZWdleHByKFBhdHRlcm4sIEROQV9zKQ0KbG9jYXRpb25zID11bmxpc3QobG9jYXRpb25zKQ0KcHJpbnQobG9jYXRpb25zKQ0KDQpudW1fb2NjPSBzdW0obG9jYXRpb25zICE9LTEpDQpwcmludChudW1fb2NjKQ0KYGBgDQoNCiMgVmVjdG9yDQoNCiMjIENyZWF0ZSB2ZWN0b3INCmBgYHtyfQ0KYWdlcz0gYygyMCwgMjUsMTgsNjUsMTgsNDUpDQpwcmludChhZ2VzKQ0KDQoNCmBgYA0KDQojIyBJbmRleGluZw0KYGBge3J9DQphZ2VzWzFdDQphZ2VzWy0xXQ0KYWdlc1s3MF0NCmBgYA0KDQojIyBBcHBlbmQNCmBgYHtyfQ0KYWdlcz0gYyhhZ2VzLCAxMDAsIDI1KQ0KYWdlcw0KYGBgDQoNCg0KIyMgU3Vic2V0IG9yIHNsaWNlDQpgYGB7cn0NCmFnZXMNCnByaW50KGFnZXNbMzo2XSkNCnByaW50KGFnZXNbMTo0XSkNCnByaW50KGFnZXNbNjoyMF0pDQpgYGANCiMjIExlbmd0aA0KYGBge3J9DQpwcmludChsZW5ndGgoYWdlcykpDQpgYGANCg0KIyMgSW5zZXJ0IGluIGEgc3BlY2lmaWMgcG9zaXRpb24NCmBgYHtyfQ0KcHJpbnQoYWdlcykNCnByaW50KGFnZXNbMToyXSkNCmFnZXNfZXh0ZW5kZWQ9Yw0KYGBgDQoNCiMjIFZlY3RvciBDb25kaXRpb25hbA0KYGBge3J9DQphZ2U9IGMoMTgsMjAsMTgsNDAsNTAsNjApDQpwcmludChhZ2UpDQoNCmFnZV81MD1hZ2VbYWdlPD01MF0NCmFnZV81MA0KDQphZ2VfMjVfNTA9YWdlW2FnZT49MjUgJiBhZ2U8PTUwXQ0KYWdlXzI1XzUwDQoNCmFnZV8yNV81MFsxXQ0KDQpgYGANCg0KYGBge3J9DQphZ2VzW2FnZXMgPj0yNSAmIGFnZXM8PTUwXQ0KDQpgYGANCg0KIyMgU29ydGluZw0KDQpgYGB7cn0NCnNvcnRlZF9hZ2VzPXNvcnQoYWdlcykNCg0KDQpzb3J0KHNvcnRlZF9hZ2VzLCBUUlVFKQ0KDQpgYGANCg0KDQojIFNpbXBsZSBTdGF0aXN0aWNzDQoNCmBgYHtyfQ0KYWdlcw0KbWF4KGFnZXMpDQptYXgoYWdlc1syOjVdKQ0KbWluKGFnZXMpDQptZWFuKGFnZXMpDQptZWRpYW4oYWdlcykNCnN1bShhZ2VzKQ0Kc2QoYWdlcykNCnZhcihhZ2VzKQ0KcXVhbnRpbGUoYWdlcykNCmBgYA0KDQoNCmBgYHtyfQ0KbGVuZ3RoKGFnZXMpDQpzY29yZXM9IGMoMTAsIDIwLCAzMCw0MCw1MCw2MCw3MCw4MCkNCg0KbGVuZ3RoKHNjb3JlcykNCg0KY29yKGFnZXMsIHNjb3JlcykNCg0KYGBgDQoNCiMgRGF0YSBmcmFtZQ0KDQpgYGB7cn0NCmV4YW1fc2NvcmUgPSBkYXRhLmZyYW1lKA0KICBJRD0gYygxLCAyLCAzLCA0LCA1KSwNCiAgTmFtZXM9IGMoIkFtemFkIiwgIkthbGFtIiwgIlN1YmhhbiIsICJDaGFuZyIsICJCcmFuIiksDQogIEFnZT0gYygyMCwyMiwyMywyNCwyNSksDQogIFNjb3JlPSBjKDEwMCwgOTIsIDgwLDYwLDU1KQ0KKQ0KDQpwcmludChleGFtX3Njb3JlKQ0KYGBgDQoNCmBgYHtyfQ0KZXhhbV9zY29yZVsxOjMsIDM6NF0NCmV4YW1fc2NvcmVbMiwgMzo0XQ0KDQpleGFtX3Njb3JlWzIsIF0NCmV4YW1fc2NvcmVbICwgNF0NCg0KYGBgDQoNCmBgYHtyfQ0KZXhhbV9zY29yZVszOjQsIDFdDQpleGFtX3Njb3JlWyAsIDJdDQpgYGANCiMjIG1ha2UgdGFibGUgd2l0aCBvbmx5IG5hbWUgYW5kIGFnZQ0KYGBge3J9DQpleGFtX3Njb3JlJE5hbWVzDQpleGFtX3Njb3JlJEFnZQ0KDQpzZWxlY3RlZF9jb2x1bW5zPSBkYXRhLmZyYW1lKA0KICBOYW1lcz0gZXhhbV9zY29yZSROYW1lcywNCiAgQWdlPWV4YW1fc2NvcmUkQWdlDQopDQoNCnByaW50KHNlbGVjdGVkX2NvbHVtbiBzKQ0KYGBgDQojIyMgQWx0ZXJuYXRpdmUgd2F5DQpgYGB7cn0NCmV4YW1fc2NvcmVbYygxLDIsMyw0LDUpLCBjKDIsMyldDQpgYGANCg0KYGBge3J9DQpleGFtX3Njb3JlW2V4YW1fc2NvcmUkU2NvcmU+PTgwICYgZXhhbV9zY29yZSRTY29yZTw5NSwgXQ0KYGBgDQoNCg0KIyMjIHdoZW4geW91IGhhdmUgdG8gc2VsZWN0IHNwZWNpZmljIElEIGNvbnRhaW5pbmcgcm93cywgdXNlICVpbiUgLiB0aGlzIG9wZXJhdGlvbiB3aWxsIHNlbGVjdCBvbmx5IDEsNSBpbiBJRC4NCmBgYHtyfQ0Kc2VsZWN0ZWRfSUQ9IGMoMSw1KQ0KZXhhbV9zY29yZVtleGFtX3Njb3JlJElEICVpbiUgc2VsZWN0ZWRfSUQsIF0NCmBgYA0KDQpgYGB7cn0NCnNlbGVjdGVkX0FnZT0gYygyMCwyMikNCmV4YW1fc2NvcmVbZXhhbV9zY29yZSRBZ2UgJWluJSBzZWxlY3RlZF9BZ2UsIF0NCmBgYA0KDQoNCiMjIEFkZGluZyBuZXcgcm93cyBhbmQgY29sdW1uc3ENCg0KDQpgYGB7cn0NCmV4YW1fc2NvcmUNCm5ld19yb3c9IGMoMTAsICJTYW0iLCA1NSwgMTAwKQ0KDQpleGFtX3Njb3JlMj0gcmJpbmQoZXhhbV9zY29yZSwgbmV3X3JvdykNCg0KZXhhbV9zY29yZTINCg0KDQpMb2NhdGlvbiA9IGMoImRoayIsICJjdGciLCAiYmFycyIsICJqZmsiLCAibW1zIiwgImNtbCIpDQpleGFtX3Njb3JlMz0gY2JpbmQoZXhhbV9zY29yZTIsIExvY2F0aW9uKQ0KZXhhbV9zY29yZTMNCmBgYA0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg==