String
Pattern matching
DNA_s= "ATTTGCATGCAAAGGGCATGCAG"
Pattern= "ATT"
Matches = grepl(Pattern, DNA_s)
print(Matches)
[1] TRUE
Matches = grepl("ATTTTCGG", DNA_s)
print(Matches)
[1] FALSE
Split
genes ="gene1,gene2,gene3"
split_genes=strsplit(genes, ",")
print(split_genes)
[[1]]
[1] "gene1" "gene2" "gene3"
paragraph = "This paragraph will be split for spaces"
split_paragraph=strsplit(paragraph, " ")
print(split_paragraph)
[[1]]
[1] "This" "paragraph" "will" "be" "split" "for" "spaces"
sentence= "this is sentence 1. second sentence. third sentence."
split_sentence=strsplit(sentence, "\\. ")
print(split_sentence)
[[1]]
[1] "this is sentence 1" "second sentence" "third sentence."
Reverse
library(stringi)
s1= "ABCDEFG"
rev_s1= stri_reverse(s1)
print(rev_s1)
[1] "GFEDCBA"
Number of occurences
DNA_s= "ATTTGCATGATTCAAAGGGCATGCAG"
Pattern= "ATT"
locations = gregexpr(Pattern, DNA_s)
locations =unlist(locations)
print(locations)
[1] 1 10
num_occ= sum(locations !=-1)
print(num_occ)
[1] 2
Vector
Create vector
ages= c(20, 25,18,65,18,45)
print(ages)
[1] 20 25 18 65 18 45
Indexing
ages[1]
[1] 20
ages[-1]
[1] 25 18 65 18 45
ages[70]
[1] NA
Append
ages= c(ages, 100, 25)
ages
[1] 20 25 18 65 18 45 100 25
Subset or slice
ages
[1] 20 25 18 65 18 45 100 25
print(ages[3:6])
[1] 18 65 18 45
print(ages[1:4])
[1] 20 25 18 65
print(ages[6:20])
[1] 45 100 25 NA NA NA NA NA NA NA NA NA NA NA NA
Length
print(length(ages))
[1] 8
Insert in a specific position
print(ages)
[1] 20 25 18 65 18 45 100 25
print(ages[1:2])
[1] 20 25
ages_extended=c
Vector Conditional
age= c(18,20,18,40,50,60)
print(age)
[1] 18 20 18 40 50 60
age_50=age[age<=50]
age_50
[1] 18 20 18 40 50
age_25_50=age[age>=25 & age<=50]
age_25_50
[1] 40 50
age_25_50[1]
[1] 40
ages[ages >=25 & ages<=50]
[1] 25 45 25
ages
[1] 20 25 18 65 18 45 100 25
Sorting
sorted_ages=sort(ages)
sort(sorted_ages, TRUE)
[1] 100 65 45 25 25 20 18 18
Simple Statistics
ages
[1] 20 25 18 65 18 45 100 25
max(ages)
[1] 100
max(ages[2:5])
[1] 65
min(ages)
[1] 18
mean(ages)
[1] 39.5
median(ages)
[1] 25
sum(ages)
[1] 316
sd(ages)
[1] 29.43759
var(ages)
[1] 866.5714
quantile(ages)
0% 25% 50% 75% 100%
18.0 19.5 25.0 50.0 100.0
mode(ages)
[1] "numeric"
length(ages)
[1] 8
scores= c(10, 20, 30,40,50,60,70,80)
length(scores)
[1] 8
cor(ages, scores)
[1] 0.4398222
Data frame
exam_score = data.frame(
ID= c(1, 2, 3, 4, 5),
Names= c("Amzad", "Kalam", "Subhan", "Chang", "Bran"),
Age= c(20,22,23,24,25),
Score= c(100, 92, 80,60,55)
)
print(exam_score)
exam_score[1:3, 3:4]
exam_score[2, 3:4]
exam_score[2, ]
exam_score[ , 4]
[1] 100 92 80 60 55
exam_score[3:4, 1]
[1] 3 4
exam_score[ , 2]
[1] "Amzad" "Kalam" "Subhan" "Chang" "Bran"
make table with only name and age
exam_score$Names
[1] "Amzad" "Kalam" "Subhan" "Chang" "Bran"
exam_score$Age
[1] 20 22 23 24 25
selected_columns= data.frame(
Names= exam_score$Names,
Age=exam_score$Age
)
print(selected_columns)
Alternative way
exam_score[c(1,2,3,4,5), c(2,3)]
exam_score[exam_score$Score>=80 & exam_score$Score<95, ]
when you have to select specific ID containing rows, use %in% . this
operation will select only 1,5 in ID.
selected_ID= c(1,5)
exam_score[exam_score$ID %in% selected_ID, ]
selected_Age= c(20,22)
exam_score[exam_score$Age %in% selected_Age, ]
Adding new rows and columnsq
exam_score
new_row= c(10, "Sam", 55, 100)
exam_score2= rbind(exam_score, new_row)
exam_score2
Location = c("dhk", "ctg", "bars", "jfk", "mms", "cml")
exam_score3= cbind(exam_score2, Location)
exam_score3
LS0tDQp0aXRsZTogIkxlY3R1cmUgMiINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCiMgU3RyaW5nDQojIyBQYXR0ZXJuIG1hdGNoaW5nDQoNCmBgYHtyfQ0KRE5BX3M9ICJBVFRUR0NBVEdDQUFBR0dHQ0FUR0NBRyINClBhdHRlcm49ICJBVFQiDQoNCk1hdGNoZXMgPSBncmVwbChQYXR0ZXJuLCBETkFfcykNCnByaW50KE1hdGNoZXMpDQoNCk1hdGNoZXMgPSBncmVwbCgiQVRUVFRDR0ciLCBETkFfcykNCnByaW50KE1hdGNoZXMpDQpgYGANCg0KIyMgU3BsaXQNCmBgYHtyfQ0KZ2VuZXMgPSJnZW5lMSxnZW5lMixnZW5lMyINCnNwbGl0X2dlbmVzPXN0cnNwbGl0KGdlbmVzLCAiLCIpDQpwcmludChzcGxpdF9nZW5lcykNCg0KDQpwYXJhZ3JhcGggPSAiVGhpcyBwYXJhZ3JhcGggd2lsbCBiZSBzcGxpdCBmb3Igc3BhY2VzIg0Kc3BsaXRfcGFyYWdyYXBoPXN0cnNwbGl0KHBhcmFncmFwaCwgIiAiKQ0KcHJpbnQoc3BsaXRfcGFyYWdyYXBoKQ0KDQpzZW50ZW5jZT0gInRoaXMgaXMgc2VudGVuY2UgMS4gc2Vjb25kIHNlbnRlbmNlLiB0aGlyZCBzZW50ZW5jZS4iDQpzcGxpdF9zZW50ZW5jZT1zdHJzcGxpdChzZW50ZW5jZSwgIlxcLiAiKQ0KcHJpbnQoc3BsaXRfc2VudGVuY2UpDQpgYGANCg0KDQojIyBSZXZlcnNlDQpgYGB7cn0NCmxpYnJhcnkoc3RyaW5naSkNCnMxPSAiQUJDREVGRyINCnJldl9zMT0gc3RyaV9yZXZlcnNlKHMxKQ0KcHJpbnQocmV2X3MxKQ0KYGBgDQoNCg0KIyMgTnVtYmVyIG9mIG9jY3VyZW5jZXMNCmBgYHtyfQ0KRE5BX3M9ICJBVFRUR0NBVEdBVFRDQUFBR0dHQ0FUR0NBRyINClBhdHRlcm49ICJBVFQiDQoNCmxvY2F0aW9ucyA9IGdyZWdleHByKFBhdHRlcm4sIEROQV9zKQ0KbG9jYXRpb25zID11bmxpc3QobG9jYXRpb25zKQ0KcHJpbnQobG9jYXRpb25zKQ0KDQpudW1fb2NjPSBzdW0obG9jYXRpb25zICE9LTEpDQpwcmludChudW1fb2NjKQ0KYGBgDQoNCiMgVmVjdG9yDQoNCiMjIENyZWF0ZSB2ZWN0b3INCmBgYHtyfQ0KYWdlcz0gYygyMCwgMjUsMTgsNjUsMTgsNDUpDQpwcmludChhZ2VzKQ0KDQoNCmBgYA0KDQojIyBJbmRleGluZw0KYGBge3J9DQphZ2VzWzFdDQphZ2VzWy0xXQ0KYWdlc1s3MF0NCmBgYA0KDQojIyBBcHBlbmQNCmBgYHtyfQ0KYWdlcz0gYyhhZ2VzLCAxMDAsIDI1KQ0KYWdlcw0KYGBgDQoNCg0KIyMgU3Vic2V0IG9yIHNsaWNlDQpgYGB7cn0NCmFnZXMNCnByaW50KGFnZXNbMzo2XSkNCnByaW50KGFnZXNbMTo0XSkNCnByaW50KGFnZXNbNjoyMF0pDQpgYGANCiMjIExlbmd0aA0KYGBge3J9DQpwcmludChsZW5ndGgoYWdlcykpDQpgYGANCg0KIyMgSW5zZXJ0IGluIGEgc3BlY2lmaWMgcG9zaXRpb24NCmBgYHtyfQ0KcHJpbnQoYWdlcykNCnByaW50KGFnZXNbMToyXSkNCmFnZXNfZXh0ZW5kZWQ9Yw0KYGBgDQoNCiMjIFZlY3RvciBDb25kaXRpb25hbA0KYGBge3J9DQphZ2U9IGMoMTgsMjAsMTgsNDAsNTAsNjApDQpwcmludChhZ2UpDQoNCmFnZV81MD1hZ2VbYWdlPD01MF0NCmFnZV81MA0KDQphZ2VfMjVfNTA9YWdlW2FnZT49MjUgJiBhZ2U8PTUwXQ0KYWdlXzI1XzUwDQoNCmFnZV8yNV81MFsxXQ0KDQpgYGANCg0KYGBge3J9DQphZ2VzW2FnZXMgPj0yNSAmIGFnZXM8PTUwXQ0KDQpgYGANCg0KIyMgU29ydGluZw0KDQpgYGB7cn0NCnNvcnRlZF9hZ2VzPXNvcnQoYWdlcykNCg0KDQpzb3J0KHNvcnRlZF9hZ2VzLCBUUlVFKQ0KDQpgYGANCg0KDQojIFNpbXBsZSBTdGF0aXN0aWNzDQoNCmBgYHtyfQ0KYWdlcw0KbWF4KGFnZXMpDQptYXgoYWdlc1syOjVdKQ0KbWluKGFnZXMpDQptZWFuKGFnZXMpDQptZWRpYW4oYWdlcykNCnN1bShhZ2VzKQ0Kc2QoYWdlcykNCnZhcihhZ2VzKQ0KcXVhbnRpbGUoYWdlcykNCmBgYA0KDQoNCmBgYHtyfQ0KbGVuZ3RoKGFnZXMpDQpzY29yZXM9IGMoMTAsIDIwLCAzMCw0MCw1MCw2MCw3MCw4MCkNCg0KbGVuZ3RoKHNjb3JlcykNCg0KY29yKGFnZXMsIHNjb3JlcykNCg0KYGBgDQoNCiMgRGF0YSBmcmFtZQ0KDQpgYGB7cn0NCmV4YW1fc2NvcmUgPSBkYXRhLmZyYW1lKA0KICBJRD0gYygxLCAyLCAzLCA0LCA1KSwNCiAgTmFtZXM9IGMoIkFtemFkIiwgIkthbGFtIiwgIlN1YmhhbiIsICJDaGFuZyIsICJCcmFuIiksDQogIEFnZT0gYygyMCwyMiwyMywyNCwyNSksDQogIFNjb3JlPSBjKDEwMCwgOTIsIDgwLDYwLDU1KQ0KKQ0KDQpwcmludChleGFtX3Njb3JlKQ0KYGBgDQoNCmBgYHtyfQ0KZXhhbV9zY29yZVsxOjMsIDM6NF0NCmV4YW1fc2NvcmVbMiwgMzo0XQ0KDQpleGFtX3Njb3JlWzIsIF0NCmV4YW1fc2NvcmVbICwgNF0NCg0KYGBgDQoNCmBgYHtyfQ0KZXhhbV9zY29yZVszOjQsIDFdDQpleGFtX3Njb3JlWyAsIDJdDQpgYGANCiMjIG1ha2UgdGFibGUgd2l0aCBvbmx5IG5hbWUgYW5kIGFnZQ0KYGBge3J9DQpleGFtX3Njb3JlJE5hbWVzDQpleGFtX3Njb3JlJEFnZQ0KDQpzZWxlY3RlZF9jb2x1bW5zPSBkYXRhLmZyYW1lKA0KICBOYW1lcz0gZXhhbV9zY29yZSROYW1lcywNCiAgQWdlPWV4YW1fc2NvcmUkQWdlDQopDQoNCnByaW50KHNlbGVjdGVkX2NvbHVtbiBzKQ0KYGBgDQojIyMgQWx0ZXJuYXRpdmUgd2F5DQpgYGB7cn0NCmV4YW1fc2NvcmVbYygxLDIsMyw0LDUpLCBjKDIsMyldDQpgYGANCg0KYGBge3J9DQpleGFtX3Njb3JlW2V4YW1fc2NvcmUkU2NvcmU+PTgwICYgZXhhbV9zY29yZSRTY29yZTw5NSwgXQ0KYGBgDQoNCg0KIyMjIHdoZW4geW91IGhhdmUgdG8gc2VsZWN0IHNwZWNpZmljIElEIGNvbnRhaW5pbmcgcm93cywgdXNlICVpbiUgLiB0aGlzIG9wZXJhdGlvbiB3aWxsIHNlbGVjdCBvbmx5IDEsNSBpbiBJRC4NCmBgYHtyfQ0Kc2VsZWN0ZWRfSUQ9IGMoMSw1KQ0KZXhhbV9zY29yZVtleGFtX3Njb3JlJElEICVpbiUgc2VsZWN0ZWRfSUQsIF0NCmBgYA0KDQpgYGB7cn0NCnNlbGVjdGVkX0FnZT0gYygyMCwyMikNCmV4YW1fc2NvcmVbZXhhbV9zY29yZSRBZ2UgJWluJSBzZWxlY3RlZF9BZ2UsIF0NCmBgYA0KDQoNCiMjIEFkZGluZyBuZXcgcm93cyBhbmQgY29sdW1uc3ENCg0KDQpgYGB7cn0NCmV4YW1fc2NvcmUNCm5ld19yb3c9IGMoMTAsICJTYW0iLCA1NSwgMTAwKQ0KDQpleGFtX3Njb3JlMj0gcmJpbmQoZXhhbV9zY29yZSwgbmV3X3JvdykNCg0KZXhhbV9zY29yZTINCg0KDQpMb2NhdGlvbiA9IGMoImRoayIsICJjdGciLCAiYmFycyIsICJqZmsiLCAibW1zIiwgImNtbCIpDQpleGFtX3Njb3JlMz0gY2JpbmQoZXhhbV9zY29yZTIsIExvY2F0aW9uKQ0KZXhhbV9zY29yZTMNCmBgYA0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg==