Question 1

Solution 1(a)

Separate all the strings and store it in a variable “gene_seqs”

given_combination = "ATCGATCGATCG-ATCGAT-CGATC-GATCGAT-CGATCG-ATCGATCG-CGATCG"
gene_seqs=unlist(strsplit(given_combination, "-"))
cat("(a) gene_seqs=",gene_seqs, '\n')
(a) gene_seqs= ATCGATCGATCG ATCGAT CGATC GATCGAT CGATCG ATCGATCG CGATCG 

Solution 1(b)

Using a loop reverse all the strings in gene_seqs

library(stringi) 
i=1
for(r in gene_seqs){

 cat(i, r, "=", stri_reverse(r),'\n')
 i=1+i
}
1 ATCGATCGATCG = GCTAGCTAGCTA 
2 ATCGAT = TAGCTA 
3 CGATC = CTAGC 
4 GATCGAT = TAGCTAG 
5 CGATCG = GCTAGC 
6 ATCGATCG = GCTAGCTA 
7 CGATCG = GCTAGC 

Solution 1(c)

Using a loop calculate length of each string in gene_seqs

i=1
for(g in gene_seqs) {

  cat(i, g, "=",nchar(g),'\n')
  i=1+i
}
1 ATCGATCGATCG = 12 
2 ATCGAT = 6 
3 CGATC = 5 
4 GATCGAT = 7 
5 CGATCG = 6 
6 ATCGATCG = 8 
7 CGATCG = 6 

Solution 1(d)

In gene_seqs, if a string length is greater than 7, print “Successful” else print “Error”. (use loop and condition)

i=1
for(s in gene_seqs) {
  
  if(nchar(s) > 7) {
    cat(i, s,"=",nchar(s), ",successful \n")
  } 
  else {
    cat(i, s,"=",(nchar(s)),",error \n")
  }
  i=1+i
  
}
1 ATCGATCGATCG = 12 ,successful 
2 ATCGAT = 6 ,error 
3 CGATC = 5 ,error 
4 GATCGAT = 7 ,error 
5 CGATCG = 6 ,error 
6 ATCGATCG = 8 ,successful 
7 CGATCG = 6 ,error 

Solution 1(e)

Find the locations and number of occurrences of pattern “GATC” in each string of the gene_seqs

pattern = ('GATC')
i=1
for (n in gene_seqs){
  locations = gregexpr(pattern, n)
locations = unlist(locations)
cat(i,n,":",locations,'\n')
i=1+i
}
1 ATCGATCGATCG : 4 8 
2 ATCGAT : -1 
3 CGATC : 2 
4 GATCGAT : 1 
5 CGATCG : 2 
6 ATCGATCG : 4 
7 CGATCG : 2 

Bonus (+5): Solve b to e questions using one loop.

given_combination = "ATCGATCGATCG-ATCGAT-CGATC-GATCGAT-CGATCG-ATCGATCG-CGATCG"
gene_seqs=unlist(strsplit(given_combination, "-"))
i=1
for (gene in gene_seqs){
  cat("[",i,"]",'\n')
  i=1+i
  cat("(b)reverse=", stri_reverse(gene),'\n')
  cat("(c) length of ",gene, "=",nchar(gene),'\n')
  if(nchar(gene) > 7) {
    cat("(d)   successful",'\n')
  } 
  else {
    cat("(d)   error",'\n')
  }
pattern = ('GATC')
  locations = gregexpr(pattern, gene)
locations = unlist(locations)
nm= sum(locations != -1)
cat("(e) Locations of GATC in ",gene,":",locations,", number of occurrences =",nm,'\n','\n')
}  
[ 1 ] 
(b)reverse= GCTAGCTAGCTA 
(c) length of  ATCGATCGATCG = 12 
(d)   successful 
(e) Locations of GATC in  ATCGATCGATCG : 4 8 , number of occurrences = 2 
 
[ 2 ] 
(b)reverse= TAGCTA 
(c) length of  ATCGAT = 6 
(d)   error 
(e) Locations of GATC in  ATCGAT : -1 , number of occurrences = 0 
 
[ 3 ] 
(b)reverse= CTAGC 
(c) length of  CGATC = 5 
(d)   error 
(e) Locations of GATC in  CGATC : 2 , number of occurrences = 1 
 
[ 4 ] 
(b)reverse= TAGCTAG 
(c) length of  GATCGAT = 7 
(d)   error 
(e) Locations of GATC in  GATCGAT : 1 , number of occurrences = 1 
 
[ 5 ] 
(b)reverse= GCTAGC 
(c) length of  CGATCG = 6 
(d)   error 
(e) Locations of GATC in  CGATCG : 2 , number of occurrences = 1 
 
[ 6 ] 
(b)reverse= GCTAGCTA 
(c) length of  ATCGATCG = 8 
(d)   successful 
(e) Locations of GATC in  ATCGATCG : 4 , number of occurrences = 1 
 
[ 7 ] 
(b)reverse= GCTAGC 
(c) length of  CGATCG = 6 
(d)   error 
(e) Locations of GATC in  CGATCG : 2 , number of occurrences = 1 
 

Question 2

Solution 2(a)

Create the data frame. Add 2 new rows. Add a new column called “Income”. This column should be numerical.

exam_score = data.frame(
  ID = c(1, 2, 3, 4, 5),
  Name = c("Alice", "Bob", "David", "John", "Jenny"),
  Age = c(20, 25, 30, 22, 18),
  Score = c(100, 78, 90, 55, 81)

)
exam_score
new_row1 = data.frame(
ID = c(6,7),
  Name = c("Aria","Diana"),
  Age = c(22,24),
  Score = c(76, 78)  
)
new_row1

exam_score_2 = rbind(exam_score, new_row1)

exam_score_2

Income = data.frame(
  Income=c(15000, 20000,12000,25000,30000,22000,18000))
exam_score_3 = cbind(exam_score_2,Income)
exam_score_3
NA

Solution 2(b)

Find max, min, median, sum, mean, standard deviation, variance, quantiles of column Age, Score, and Income.

exam_score_3[ , 3:5 ]
cat("max_age=",max(exam_score_3$Age),'\n',"max_score=",max(exam_score_3$Score),'\n',"max_income=",max(exam_score_3$Income),'\n','\n')
max_age= 30 
 max_score= 100 
 max_income= 30000 
 
cat("min_age=", min(exam_score_3$Age),'\n',"min_score=", min(exam_score_3$Score),'\n',"min_income=", min(Income),'\n','\n')
min_age= 18 
 min_score= 55 
 min_income= 12000 
 
cat("median_age=",median(exam_score_3$Age),'\n',"median_score=", median(exam_score_3$Score),'\n',"median_income=", median(exam_score_3$Income),'\n','\n')
median_age= 22 
 median_score= 78 
 median_income= 20000 
 
cat("mean_age=", mean(exam_score_3$Age),'\n',"mean_score=", mean(exam_score_3$Score),'\n',"mean_income=",mean(exam_score_3$Income),'\n','\n')
mean_age= 23 
 mean_score= 79.71429 
 mean_income= 20285.71 
 
cat("sd_age=", sd(exam_score_3$Age),'\n',"sd_score=", sd(exam_score_3$Score),'\n',"sd_income=", sd(exam_score_3$Income),'\n','\n')
sd_age= 3.872983 
 sd_score= 13.84093 
 sd_income= 6074.929 
 
cat("var_age=", var(exam_score_3$Age),'\n',"var_score=", var(exam_score_3$Score),'\n',"var_income=", var(exam_score_3$Income),'\n','\n')
var_age= 15 
 var_score= 191.5714 
 var_income= 36904762 
 
#   quantile(exam_score_3$Age)
#   quantile(exam_score_3$Score)
#   quantile(exam_score_3$Income)

cat("quantile_age=", quantile(exam_score_3$Age),'\n', "quantile_score=", quantile(exam_score_3$Score),'\n',
"quantile_income=", quantile(exam_score_3$Income),'\n','\n')
quantile_age= 18 21 22 24.5 30 
 quantile_score= 55 77 78 85.5 100 
 quantile_income= 12000 16500 20000 23500 30000 
 

Solution 2(c)

Find correlation between i. Age and score, ii. Age and income, iii. Score and income

cat("correlation between Age and score =",cor(exam_score_3$Age, exam_score_3$Score),'\n')
correlation between Age and score = 0.08705541 
cat("correlation between Age and income = ",cor(exam_score_3$Age, exam_score_3$Income),'\n')
correlation between Age and income =  -0.7083729 
cat("correlation between Score and income =",cor(exam_score_3$Score, exam_score_3$Income),'\n')
correlation between Score and income = -0.5915379 

Solution 2(d)

Select rows where the score is greater than or equal to 80

exam_score_3[ exam_score_3$Score >= 80, ]

Solution 2(e)

Select rows with age range 20 to 30

exam_score_3[exam_score_3$Age >= 20 & exam_score_3$Age <= 30, ]
LS0tDQp0aXRsZTogIkFzc2lnbm1lbnQgMiINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQojIFF1ZXN0aW9uIDENCiMjIFNvbHV0aW9uIDEoYSkNCiMjIyBTZXBhcmF0ZSBhbGwgdGhlIHN0cmluZ3MgYW5kIHN0b3JlIGl0IGluIGEgdmFyaWFibGUg4oCcZ2VuZV9zZXFz4oCdDQpgYGB7cn0NCmdpdmVuX2NvbWJpbmF0aW9uID0gIkFUQ0dBVENHQVRDRy1BVENHQVQtQ0dBVEMtR0FUQ0dBVC1DR0FUQ0ctQVRDR0FUQ0ctQ0dBVENHIg0KZ2VuZV9zZXFzPXVubGlzdChzdHJzcGxpdChnaXZlbl9jb21iaW5hdGlvbiwgIi0iKSkNCmNhdCgiKGEpIGdlbmVfc2Vxcz0iLGdlbmVfc2VxcywgJ1xuJykNCg0KDQpgYGANCg0KDQojIyBTb2x1dGlvbiAxKGIpDQojIyMgVXNpbmcgYSBsb29wIHJldmVyc2UgYWxsIHRoZSBzdHJpbmdzIGluIGdlbmVfc2Vxcw0KYGBge3J9DQpsaWJyYXJ5KHN0cmluZ2kpIA0KaT0xDQpmb3IociBpbiBnZW5lX3NlcXMpew0KDQogY2F0KGksIHIsICI9Iiwgc3RyaV9yZXZlcnNlKHIpLCdcbicpDQogaT0xK2kNCn0NCg0KYGBgDQojIyBTb2x1dGlvbiAxKGMpDQojIyMgVXNpbmcgYSBsb29wIGNhbGN1bGF0ZSBsZW5ndGggb2YgZWFjaCBzdHJpbmcgaW4gZ2VuZV9zZXFzDQpgYGB7cn0NCmk9MQ0KZm9yKGcgaW4gZ2VuZV9zZXFzKSB7DQoNCiAgY2F0KGksIGcsICI9IixuY2hhcihnKSwnXG4nKQ0KICBpPTEraQ0KfQ0KYGBgDQoNCg0KIyMgU29sdXRpb24gMShkKQ0KIyMjIEluIGdlbmVfc2VxcywgaWYgYSBzdHJpbmcgbGVuZ3RoIGlzIGdyZWF0ZXIgdGhhbiA3LCBwcmludCDigJxTdWNjZXNzZnVs4oCdIGVsc2UgcHJpbnQg4oCcRXJyb3LigJ0uICh1c2UgbG9vcCBhbmQgY29uZGl0aW9uKQ0KDQoNCmBgYHtyfQ0KaT0xDQpmb3IocyBpbiBnZW5lX3NlcXMpIHsNCiAgDQogIGlmKG5jaGFyKHMpID4gNykgew0KICAgIGNhdChpLCBzLCI9IixuY2hhcihzKSwgIixzdWNjZXNzZnVsIFxuIikNCiAgfSANCiAgZWxzZSB7DQogICAgY2F0KGksIHMsIj0iLChuY2hhcihzKSksIixlcnJvciBcbiIpDQogIH0NCiAgaT0xK2kNCiAgDQp9DQoNCmBgYA0KIyMgU29sdXRpb24gMShlKQ0KIyMjIEZpbmQgdGhlIGxvY2F0aW9ucyBhbmQgbnVtYmVyIG9mIG9jY3VycmVuY2VzIG9mIHBhdHRlcm4g4oCcR0FUQ+KAnSBpbiBlYWNoIHN0cmluZyBvZiB0aGUgZ2VuZV9zZXFzDQpgYGB7cn0NCnBhdHRlcm4gPSAoJ0dBVEMnKQ0KaT0xDQpmb3IgKG4gaW4gZ2VuZV9zZXFzKXsNCiAgbG9jYXRpb25zID0gZ3JlZ2V4cHIocGF0dGVybiwgbikNCmxvY2F0aW9ucyA9IHVubGlzdChsb2NhdGlvbnMpDQpjYXQoaSxuLCI6Iixsb2NhdGlvbnMsJ1xuJykNCmk9MStpDQp9DQpgYGANCiMgQm9udXMgKCs1KTogU29sdmUgYiB0byBlIHF1ZXN0aW9ucyB1c2luZyBvbmUgbG9vcC4gDQoNCg0KDQpgYGB7cn0NCmdpdmVuX2NvbWJpbmF0aW9uID0gIkFUQ0dBVENHQVRDRy1BVENHQVQtQ0dBVEMtR0FUQ0dBVC1DR0FUQ0ctQVRDR0FUQ0ctQ0dBVENHIg0KZ2VuZV9zZXFzPXVubGlzdChzdHJzcGxpdChnaXZlbl9jb21iaW5hdGlvbiwgIi0iKSkNCmk9MQ0KZm9yIChnZW5lIGluIGdlbmVfc2Vxcyl7DQogIGNhdCgiWyIsaSwiXSIsJ1xuJykNCiAgaT0xK2kNCiAgY2F0KCIoYilyZXZlcnNlPSIsIHN0cmlfcmV2ZXJzZShnZW5lKSwnXG4nKQ0KICBjYXQoIihjKSBsZW5ndGggb2YgIixnZW5lLCAiPSIsbmNoYXIoZ2VuZSksJ1xuJykNCiAgaWYobmNoYXIoZ2VuZSkgPiA3KSB7DQogICAgY2F0KCIoZCkgICBzdWNjZXNzZnVsIiwnXG4nKQ0KICB9IA0KICBlbHNlIHsNCiAgICBjYXQoIihkKSAgIGVycm9yIiwnXG4nKQ0KICB9DQpwYXR0ZXJuID0gKCdHQVRDJykNCiAgbG9jYXRpb25zID0gZ3JlZ2V4cHIocGF0dGVybiwgZ2VuZSkNCmxvY2F0aW9ucyA9IHVubGlzdChsb2NhdGlvbnMpDQpubT0gc3VtKGxvY2F0aW9ucyAhPSAtMSkNCmNhdCgiKGUpIExvY2F0aW9ucyBvZiBHQVRDIGluICIsZ2VuZSwiOiIsbG9jYXRpb25zLCIsIG51bWJlciBvZiBvY2N1cnJlbmNlcyA9IixubSwnXG4nLCdcbicpDQp9ICANCg0KDQoNCg0KDQoNCmBgYA0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCiMgUXVlc3Rpb24gMg0KIyMgU29sdXRpb24gMihhKQ0KIyMjIENyZWF0ZSB0aGUgZGF0YSBmcmFtZS4gQWRkIDIgbmV3IHJvd3MuIEFkZCBhIG5ldyBjb2x1bW4gY2FsbGVkIOKAnEluY29tZeKAnS4gVGhpcyBjb2x1bW4gc2hvdWxkIGJlIG51bWVyaWNhbC4gDQpgYGB7cn0NCmV4YW1fc2NvcmUgPSBkYXRhLmZyYW1lKA0KICBJRCA9IGMoMSwgMiwgMywgNCwgNSksDQogIE5hbWUgPSBjKCJBbGljZSIsICJCb2IiLCAiRGF2aWQiLCAiSm9obiIsICJKZW5ueSIpLA0KICBBZ2UgPSBjKDIwLCAyNSwgMzAsIDIyLCAxOCksDQogIFNjb3JlID0gYygxMDAsIDc4LCA5MCwgNTUsIDgxKQ0KDQopDQpleGFtX3Njb3JlDQpuZXdfcm93MSA9IGRhdGEuZnJhbWUoDQpJRCA9IGMoNiw3KSwNCiAgTmFtZSA9IGMoIkFyaWEiLCJEaWFuYSIpLA0KICBBZ2UgPSBjKDIyLDI0KSwNCiAgU2NvcmUgPSBjKDc2LCA3OCkgIA0KKQ0KbmV3X3JvdzENCg0KZXhhbV9zY29yZV8yID0gcmJpbmQoZXhhbV9zY29yZSwgbmV3X3JvdzEpDQoNCmV4YW1fc2NvcmVfMg0KDQpJbmNvbWUgPSBkYXRhLmZyYW1lKA0KICBJbmNvbWU9YygxNTAwMCwgMjAwMDAsMTIwMDAsMjUwMDAsMzAwMDAsMjIwMDAsMTgwMDApKQ0KZXhhbV9zY29yZV8zID0gY2JpbmQoZXhhbV9zY29yZV8yLEluY29tZSkNCmV4YW1fc2NvcmVfMw0KDQpgYGANCg0KIyMgU29sdXRpb24gMihiKQ0KIyMjIEZpbmQgbWF4LCBtaW4sIG1lZGlhbiwgc3VtLCBtZWFuLCBzdGFuZGFyZCBkZXZpYXRpb24sIHZhcmlhbmNlLCBxdWFudGlsZXMgb2YgY29sdW1uIEFnZSwgU2NvcmUsIGFuZCBJbmNvbWUuIA0KYGBge3J9DQpleGFtX3Njb3JlXzNbICwgMzo1IF0NCmNhdCgibWF4X2FnZT0iLG1heChleGFtX3Njb3JlXzMkQWdlKSwnXG4nLCJtYXhfc2NvcmU9IixtYXgoZXhhbV9zY29yZV8zJFNjb3JlKSwnXG4nLCJtYXhfaW5jb21lPSIsbWF4KGV4YW1fc2NvcmVfMyRJbmNvbWUpLCdcbicsJ1xuJykNCg0KY2F0KCJtaW5fYWdlPSIsIG1pbihleGFtX3Njb3JlXzMkQWdlKSwnXG4nLCJtaW5fc2NvcmU9IiwgbWluKGV4YW1fc2NvcmVfMyRTY29yZSksJ1xuJywibWluX2luY29tZT0iLCBtaW4oSW5jb21lKSwnXG4nLCdcbicpDQoNCmNhdCgibWVkaWFuX2FnZT0iLG1lZGlhbihleGFtX3Njb3JlXzMkQWdlKSwnXG4nLCJtZWRpYW5fc2NvcmU9IiwgbWVkaWFuKGV4YW1fc2NvcmVfMyRTY29yZSksJ1xuJywibWVkaWFuX2luY29tZT0iLCBtZWRpYW4oZXhhbV9zY29yZV8zJEluY29tZSksJ1xuJywnXG4nKQ0KDQoNCmNhdCgibWVhbl9hZ2U9IiwgbWVhbihleGFtX3Njb3JlXzMkQWdlKSwnXG4nLCJtZWFuX3Njb3JlPSIsIG1lYW4oZXhhbV9zY29yZV8zJFNjb3JlKSwnXG4nLCJtZWFuX2luY29tZT0iLG1lYW4oZXhhbV9zY29yZV8zJEluY29tZSksJ1xuJywnXG4nKQ0KDQpjYXQoInNkX2FnZT0iLCBzZChleGFtX3Njb3JlXzMkQWdlKSwnXG4nLCJzZF9zY29yZT0iLCBzZChleGFtX3Njb3JlXzMkU2NvcmUpLCdcbicsInNkX2luY29tZT0iLCBzZChleGFtX3Njb3JlXzMkSW5jb21lKSwnXG4nLCdcbicpDQoNCmNhdCgidmFyX2FnZT0iLCB2YXIoZXhhbV9zY29yZV8zJEFnZSksJ1xuJywidmFyX3Njb3JlPSIsIHZhcihleGFtX3Njb3JlXzMkU2NvcmUpLCdcbicsInZhcl9pbmNvbWU9IiwgdmFyKGV4YW1fc2NvcmVfMyRJbmNvbWUpLCdcbicsJ1xuJykNCg0KIwlxdWFudGlsZShleGFtX3Njb3JlXzMkQWdlKQ0KIwlxdWFudGlsZShleGFtX3Njb3JlXzMkU2NvcmUpDQojCXF1YW50aWxlKGV4YW1fc2NvcmVfMyRJbmNvbWUpDQoNCmNhdCgicXVhbnRpbGVfYWdlPSIsIHF1YW50aWxlKGV4YW1fc2NvcmVfMyRBZ2UpLCdcbicsICJxdWFudGlsZV9zY29yZT0iLCBxdWFudGlsZShleGFtX3Njb3JlXzMkU2NvcmUpLCdcbicsDQoicXVhbnRpbGVfaW5jb21lPSIsIHF1YW50aWxlKGV4YW1fc2NvcmVfMyRJbmNvbWUpLCdcbicsJ1xuJykNCg0KYGBgDQojIyBTb2x1dGlvbiAyKGMpDQojIyMgRmluZCBjb3JyZWxhdGlvbiBiZXR3ZWVuIGkuCUFnZSBhbmQgc2NvcmUsIGlpLglBZ2UgYW5kIGluY29tZSwgaWlpLglTY29yZSBhbmQgaW5jb21lDQoNCg0KYGBge3J9DQpjYXQoImNvcnJlbGF0aW9uIGJldHdlZW4gQWdlIGFuZCBzY29yZSA9Iixjb3IoZXhhbV9zY29yZV8zJEFnZSwgZXhhbV9zY29yZV8zJFNjb3JlKSwnXG4nKQ0KY2F0KCJjb3JyZWxhdGlvbiBiZXR3ZWVuIEFnZSBhbmQgaW5jb21lID0gIixjb3IoZXhhbV9zY29yZV8zJEFnZSwgZXhhbV9zY29yZV8zJEluY29tZSksJ1xuJykNCmNhdCgiY29ycmVsYXRpb24gYmV0d2VlbiBTY29yZSBhbmQgaW5jb21lID0iLGNvcihleGFtX3Njb3JlXzMkU2NvcmUsIGV4YW1fc2NvcmVfMyRJbmNvbWUpLCdcbicpDQpgYGANCiMjIFNvbHV0aW9uIDIoZCkNCiMjIyBTZWxlY3Qgcm93cyB3aGVyZSB0aGUgc2NvcmUgaXMgZ3JlYXRlciB0aGFuIG9yIGVxdWFsIHRvIDgwDQpgYGB7cn0NCmV4YW1fc2NvcmVfM1sgZXhhbV9zY29yZV8zJFNjb3JlID49IDgwLCBdDQpgYGANCg0KIyMgU29sdXRpb24gMihlKQ0KIyMjIFNlbGVjdCByb3dzIHdpdGggYWdlIHJhbmdlIDIwIHRvIDMwICANCmBgYHtyfQ0KZXhhbV9zY29yZV8zW2V4YW1fc2NvcmVfMyRBZ2UgPj0gMjAgJiBleGFtX3Njb3JlXzMkQWdlIDw9IDMwLCBdDQpgYGANCg0KDQoNCg0K