R Markdown

leer los fastas

library("seqinr")
library("ggplot2")
library("stringr")
zika<-read.fasta("Base de datos/sequencesZIKA.fasta")
sarscovid<-read.fasta("Base de datos/sequencesCovid.fasta")
wuhan<-read.fasta("Base de datos/sequencesSARS1.fasta")
mers<-read.fasta("Base de datos/sequencesMERS.fasta")
dengue<-read.fasta("Base de datos/sequences dengue.fasta")
  1. ¿Cuál es el tamaño de cada secuencia?
length(zika[[1]])
## [1] 10794
length(wuhan[[1]])
## [1] 29751
length(mers[[1]])
## [1] 30111
length(dengue[[1]])
## [1] 10735
length(sarscovid[[1]])
## [1] 29903
  1. ¿Cúal es la composición de nucleótidos de cada secuencia?
count(zika[[1]],1)
## 
##    a    c    g    t 
## 2991 2359 3139 2305
count(sarscovid[[1]],1)
## 
##    a    c    g    t 
## 8954 5492 5863 9594
count(mers[[1]],1)
## 
##    a    c    g    t 
## 7897 6096 6303 9815
count(wuhan[[1]],1)
## 
##    a    c    g    t 
## 8481 5940 6187 9143
count(dengue[[1]],1)
## 
##    a    c    g    t 
## 3426 2240 2770 2299
  1. ¿Cuál es el contenido de GC de cada virus?
GC(zika[[1]])*100
## [1] 50.93571
GC(dengue[[1]])*100
## [1] 46.66977
GC(sarscovid[[1]])*100
## [1] 37.97278
GC(mers[[1]])*100
## [1] 41.17764
GC(wuhan[[1]])*100
## [1] 40.76166
  1. Crear una función para obtener la secuencia en complementaria e imprimirla por cada secuencia
zika[[1]][c(1:10,(length(zika[[1]])-10):length(zika[[1]]))]
##  [1] "a" "g" "t" "t" "g" "t" "t" "g" "a" "t" "c" "c" "a" "t" "g" "g" "t" "t" "t"
## [20] "c" "t"
comp(zika[[1]])[c(1:10,(length(zika[[1]])-10):length(zika[[1]]))]
##  [1] "t" "c" "a" "a" "c" "a" "a" "c" "t" "a" "g" "g" "t" "a" "c" "c" "a" "a" "a"
## [20] "g" "a"
dengue[[1]][c(1:10,(length(dengue[[1]])-10):length(dengue[[1]]))]
##  [1] "a" "g" "t" "t" "g" "t" "t" "a" "g" "t" "c" "a" "a" "c" "a" "g" "g" "t" "t"
## [20] "c" "t"
comp(dengue[[1]])[c(1:10,(length(dengue[[1]])-10):length(dengue[[1]]))]
##  [1] "t" "c" "a" "a" "c" "a" "a" "t" "c" "a" "g" "t" "t" "g" "t" "c" "c" "a" "a"
## [20] "g" "a"
sarscovid[[1]][c(1:10,(length(sarscovid[[1]])-10):length(sarscovid[[1]]))]
##  [1] "a" "t" "t" "a" "a" "a" "g" "g" "t" "t" "a" "a" "a" "a" "a" "a" "a" "a" "a"
## [20] "a" "a"
comp(sarscovid[[1]])[c(1:10,(length(sarscovid[[1]])-10):length(sarscovid[[1]]))]
##  [1] "t" "a" "a" "t" "t" "t" "c" "c" "a" "a" "t" "t" "t" "t" "t" "t" "t" "t" "t"
## [20] "t" "t"
mers[[1]][c(1:10,(length(mers[[1]])-10):length(mers[[1]]))]
##  [1] "a" "t" "t" "t" "a" "a" "g" "t" "g" "a" "a" "a" "a" "a" "a" "a" "a" "a" "a"
## [20] "a" "a"
comp(mers[[1]])[c(1:10,(length(mers[[1]])-10):length(mers[[1]]))]
##  [1] "t" "a" "a" "a" "t" "t" "c" "a" "c" "t" "t" "t" "t" "t" "t" "t" "t" "t" "t"
## [20] "t" "t"
wuhan[[1]][c(1:10,(length(wuhan[[1]])-10):length(wuhan[[1]]))]
##  [1] "a" "t" "a" "t" "t" "a" "g" "g" "t" "t" "a" "a" "a" "a" "a" "a" "a" "a" "a"
## [20] "a" "a"
comp(wuhan[[1]])[c(1:10,(length(wuhan[[1]])-10):length(wuhan[[1]]))]
##  [1] "t" "a" "t" "a" "a" "t" "c" "c" "a" "a" "t" "t" "t" "t" "t" "t" "t" "t" "t"
## [20] "t" "t"
  1. Crear una gráfica de resumen para comparar la composición de nucleótidos de las 5 secuencias.
tabla<-data.frame(
  virus = rep(c("Dengue", "Mers", "SARS-COV-2","wuHan",
                "Zica"),each=4),
  nucleotido=rep(c("Adenina","Citosina","Guanina",
                   "Timina"),5),
  Frecuencia = c(count(dengue[[1]],1), count(mers[[1]],1),
                 count(sarscovid[[1]],1), count(wuhan[[1]],1),
                 count(zika[[1]],1))
)

ggplot(data = tabla) +
  geom_bar(aes(x=virus, y=Frecuencia, fill=nucleotido),
position="dodge",stat="identity")