Question 1

a) Separation of string

sequence = "ATCGATCGATCG-ATCGAT-CGATC-GATCGAT-CGATCG-ATCGATCG-CGATCG"
gene_seqs = strsplit(sequence, '-')
print(gene_seqs)
[[1]]
[1] "ATCGATCGATCG" "ATCGAT"       "CGATC"        "GATCGAT"      "CGATCG"       "ATCGATCG"     "CGATCG"      

Question b to e in a loop

for(i in gene_seqs) {
  
reverse_seq = stri_reverse(i)
print(reverse_seq) 

seg_len = nchar(i)
print(seg_len)

new_pattern = 'GATC'
location = gregexpr(new_pattern, i)
location = unlist(location)
print(location)

occurance = sum(location != -1)
print(occurance)
}
[1] "GCTAGCTAGCTA" "TAGCTA"       "CTAGC"        "TAGCTAG"      "GCTAGC"       "GCTAGCTA"     "GCTAGC"      
[1] 12  6  5  7  6  8  6
[1]  4  8 -1  2  1  2  4  2
[1] 7

Question 2

exam_score = data.frame(
  ID = c(1, 2, 3, 4, 5),
  Name = c("Alice", "Bob", "David", "John", "Jenny"),
  Age = c(20, 25, 30, 22, 18),
  Score = c(100, 78, 90, 55, 81)
)
print(exam_score)

a) Row addition

new_row = data.frame(
  ID = c(6, 7),
  Name = c("Eli", "Farha"),
  Age = c(22, 16),
  Score = c(60, 85)
)

new_score1 = rbind(exam_score, new_row)
print(new_score1)

a) Column addition

Income = c(1200, 1500, 1100, 2700, 1500, 1750, 2200)
new_score2 = cbind(new_score1, Income)
print(new_score2)

b) Statistics of data frame

cat("Maximum of age ", max(new_score2[ ,3]), "Maximum of score ", max(new_score2[ ,4]), "Maximum of income ", max(new_score2[ ,5]), "\n")
Maximum of age  30 Maximum of score  100 Maximum of income  2700 
cat("Minimum of age ", min(new_score2[ ,3]), "Minimum of score ", min(new_score2[ ,4]), "Minimum of income ", min(new_score2[ ,5]), "\n")
Minimum of age  16 Minimum of score  55 Minimum of income  1100 
cat("Median of age ", median(new_score2[ ,3]), "Median of score ", median(new_score2[ ,4]), "Median of income ", median(new_score2[ ,5]), "\n")
Median of age  22 Median of score  81 Median of income  1500 
cat("Sum of age ", sum(new_score2[ ,3]), "Sum of score ", sum(new_score2[ ,4]), "Sum of income ", sum(new_score2[ ,5]), "\n")
Sum of age  153 Sum of score  549 Sum of income  11950 
cat("Mean of age ", mean(new_score2[ ,3]), "Mean of score ", mean(new_score2[ ,4]), "Mean of income ", mean(new_score2[ ,5]), "\n")
Mean of age  21.85714 Mean of score  78.42857 Mean of income  1707.143 
cat("Standar deviation of age ", sd(new_score2[ ,3]), "Standar deviation of score ", sd(new_score2[ ,4]), "Standar deviation of income ", sd(new_score2[ ,5]), "\n")
Standar deviation of age  4.634241 Standar deviation of score  16.00893 Standar deviation of income  568.938 
cat("variance of age ", var(new_score2[ ,3]), "Variance of score ", var(new_score2[ ,4]), "variance of income ", var(new_score2[ ,5]), "\n")
variance of age  21.47619 Variance of score  256.2857 variance of income  323690.5 
cat("Quantile of age ",quantile(new_score2[ ,3]), "Quantile of score ", quantile(new_score2[ ,4]), "Quantile of income ", quantile(new_score2[ ,5]))
Quantile of age  16 19 22 23.5 30 Quantile of score  55 69 81 87.5 100 Quantile of income  1100 1350 1500 1975 2700

c) Correlation

correlationi = cor(new_score2$Age, new_score2$Score)
print(correlationi)
[1] -0.003530227
correlationii = cor(new_score2$Age, new_score2$Income)
print(correlationii)
[1] -0.4167531
correlationiii = cor(new_score2$Score, new_score2$Income)
print(correlationiii)
[1] -0.7177034

d) Conditional selection

new_score2[new_score2$Score >=80, ]

e) Conditional selection with range

new_score2[new_score2$Age > 20 & new_score2$Age < 30, ]
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIFF1ZXN0aW9uIDENCg0KIyMgYSkgU2VwYXJhdGlvbiBvZiBzdHJpbmcNCg0KYGBge3J9DQpzZXF1ZW5jZSA9ICJBVENHQVRDR0FUQ0ctQVRDR0FULUNHQVRDLUdBVENHQVQtQ0dBVENHLUFUQ0dBVENHLUNHQVRDRyINCmdlbmVfc2VxcyA9IHN0cnNwbGl0KHNlcXVlbmNlLCAnLScpDQpwcmludChnZW5lX3NlcXMpDQpgYGANCiMjIFF1ZXN0aW9uIGIgdG8gZSBpbiBhIGxvb3AgDQoNCmBgYHtyfQ0KZm9yKGkgaW4gZ2VuZV9zZXFzKSB7DQogIA0KcmV2ZXJzZV9zZXEgPSBzdHJpX3JldmVyc2UoaSkNCnByaW50KHJldmVyc2Vfc2VxKSANCg0Kc2VnX2xlbiA9IG5jaGFyKGkpDQpwcmludChzZWdfbGVuKQ0KDQpuZXdfcGF0dGVybiA9ICdHQVRDJw0KbG9jYXRpb24gPSBncmVnZXhwcihuZXdfcGF0dGVybiwgaSkNCmxvY2F0aW9uID0gdW5saXN0KGxvY2F0aW9uKQ0KcHJpbnQobG9jYXRpb24pDQoNCm9jY3VyYW5jZSA9IHN1bShsb2NhdGlvbiAhPSAtMSkNCnByaW50KG9jY3VyYW5jZSkNCn0NCmBgYA0KIyBRdWVzdGlvbiAyDQoNCmBgYHtyfQ0KZXhhbV9zY29yZSA9IGRhdGEuZnJhbWUoDQogIElEID0gYygxLCAyLCAzLCA0LCA1KSwNCiAgTmFtZSA9IGMoIkFsaWNlIiwgIkJvYiIsICJEYXZpZCIsICJKb2huIiwgIkplbm55IiksDQogIEFnZSA9IGMoMjAsIDI1LCAzMCwgMjIsIDE4KSwNCiAgU2NvcmUgPSBjKDEwMCwgNzgsIDkwLCA1NSwgODEpDQopDQpwcmludChleGFtX3Njb3JlKQ0KYGBgDQojIyBhKSBSb3cgYWRkaXRpb24NCg0KYGBge3J9DQpuZXdfcm93ID0gZGF0YS5mcmFtZSgNCiAgSUQgPSBjKDYsIDcpLA0KICBOYW1lID0gYygiRWxpIiwgIkZhcmhhIiksDQogIEFnZSA9IGMoMjIsIDE2KSwNCiAgU2NvcmUgPSBjKDYwLCA4NSkNCikNCg0KbmV3X3Njb3JlMSA9IHJiaW5kKGV4YW1fc2NvcmUsIG5ld19yb3cpDQpwcmludChuZXdfc2NvcmUxKQ0KYGBgDQoNCiMjIGEpIENvbHVtbiBhZGRpdGlvbg0KDQpgYGB7cn0NCkluY29tZSA9IGMoMTIwMCwgMTUwMCwgMTEwMCwgMjcwMCwgMTUwMCwgMTc1MCwgMjIwMCkNCm5ld19zY29yZTIgPSBjYmluZChuZXdfc2NvcmUxLCBJbmNvbWUpDQpwcmludChuZXdfc2NvcmUyKQ0KYGBgDQojIyBiKSBTdGF0aXN0aWNzIG9mIGRhdGEgZnJhbWUNCg0KYGBge3J9DQpjYXQoIk1heGltdW0gb2YgYWdlICIsIG1heChuZXdfc2NvcmUyWyAsM10pLCAiTWF4aW11bSBvZiBzY29yZSAiLCBtYXgobmV3X3Njb3JlMlsgLDRdKSwgIk1heGltdW0gb2YgaW5jb21lICIsIG1heChuZXdfc2NvcmUyWyAsNV0pLCAiXG4iKQ0KY2F0KCJNaW5pbXVtIG9mIGFnZSAiLCBtaW4obmV3X3Njb3JlMlsgLDNdKSwgIk1pbmltdW0gb2Ygc2NvcmUgIiwgbWluKG5ld19zY29yZTJbICw0XSksICJNaW5pbXVtIG9mIGluY29tZSAiLCBtaW4obmV3X3Njb3JlMlsgLDVdKSwgIlxuIikNCmNhdCgiTWVkaWFuIG9mIGFnZSAiLCBtZWRpYW4obmV3X3Njb3JlMlsgLDNdKSwgIk1lZGlhbiBvZiBzY29yZSAiLCBtZWRpYW4obmV3X3Njb3JlMlsgLDRdKSwgIk1lZGlhbiBvZiBpbmNvbWUgIiwgbWVkaWFuKG5ld19zY29yZTJbICw1XSksICJcbiIpDQpjYXQoIlN1bSBvZiBhZ2UgIiwgc3VtKG5ld19zY29yZTJbICwzXSksICJTdW0gb2Ygc2NvcmUgIiwgc3VtKG5ld19zY29yZTJbICw0XSksICJTdW0gb2YgaW5jb21lICIsIHN1bShuZXdfc2NvcmUyWyAsNV0pLCAiXG4iKQ0KY2F0KCJNZWFuIG9mIGFnZSAiLCBtZWFuKG5ld19zY29yZTJbICwzXSksICJNZWFuIG9mIHNjb3JlICIsIG1lYW4obmV3X3Njb3JlMlsgLDRdKSwgIk1lYW4gb2YgaW5jb21lICIsIG1lYW4obmV3X3Njb3JlMlsgLDVdKSwgIlxuIikNCmNhdCgiU3RhbmRhciBkZXZpYXRpb24gb2YgYWdlICIsIHNkKG5ld19zY29yZTJbICwzXSksICJTdGFuZGFyIGRldmlhdGlvbiBvZiBzY29yZSAiLCBzZChuZXdfc2NvcmUyWyAsNF0pLCAiU3RhbmRhciBkZXZpYXRpb24gb2YgaW5jb21lICIsIHNkKG5ld19zY29yZTJbICw1XSksICJcbiIpDQpjYXQoInZhcmlhbmNlIG9mIGFnZSAiLCB2YXIobmV3X3Njb3JlMlsgLDNdKSwgIlZhcmlhbmNlIG9mIHNjb3JlICIsIHZhcihuZXdfc2NvcmUyWyAsNF0pLCAidmFyaWFuY2Ugb2YgaW5jb21lICIsIHZhcihuZXdfc2NvcmUyWyAsNV0pLCAiXG4iKQ0KY2F0KCJRdWFudGlsZSBvZiBhZ2UgIixxdWFudGlsZShuZXdfc2NvcmUyWyAsM10pLCAiUXVhbnRpbGUgb2Ygc2NvcmUgIiwgcXVhbnRpbGUobmV3X3Njb3JlMlsgLDRdKSwgIlF1YW50aWxlIG9mIGluY29tZSAiLCBxdWFudGlsZShuZXdfc2NvcmUyWyAsNV0pKQ0KYGBgDQojIyBjKSBDb3JyZWxhdGlvbg0KDQpgYGB7cn0NCmNvcnJlbGF0aW9uaSA9IGNvcihuZXdfc2NvcmUyJEFnZSwgbmV3X3Njb3JlMiRTY29yZSkNCnByaW50KGNvcnJlbGF0aW9uaSkNCmNvcnJlbGF0aW9uaWkgPSBjb3IobmV3X3Njb3JlMiRBZ2UsIG5ld19zY29yZTIkSW5jb21lKQ0KcHJpbnQoY29ycmVsYXRpb25paSkNCmNvcnJlbGF0aW9uaWlpID0gY29yKG5ld19zY29yZTIkU2NvcmUsIG5ld19zY29yZTIkSW5jb21lKQ0KcHJpbnQoY29ycmVsYXRpb25paWkpDQpgYGANCiMjIGQpIENvbmRpdGlvbmFsIHNlbGVjdGlvbg0KYGBge3J9DQpuZXdfc2NvcmUyW25ld19zY29yZTIkU2NvcmUgPj04MCwgXQ0KYGBgDQoNCiMjIGUpIENvbmRpdGlvbmFsIHNlbGVjdGlvbiB3aXRoIHJhbmdlDQpgYGB7cn0NCm5ld19zY29yZTJbbmV3X3Njb3JlMiRBZ2UgPiAyMCAmIG5ld19zY29yZTIkQWdlIDwgMzAsIF0NCmBgYA0KDQo=