#Inicialización de variables y carga de librerías
library(seqinr)
## Warning: package 'seqinr' was built under R version 3.5.3
library(ape)
##
## Attaching package: 'ape'
## The following objects are masked from 'package:seqinr':
##
## as.alignment, consensus
El desarrollo de las gráficas de caracerísticas de una secuencia serán realizadas de la proteina: Multifunctional 2-oxoglutarate metabolism enzyme.
path <- "D:/OCTAVO CICLO/Bioinformatica/CLASE 2/data/A0PVU7.fasta.txt"
oxa <- seqinr::read.fasta(path,seqtype="AA")
##
## A C D E F G H I K L M N P Q R S T V W Y
## 122 8 75 84 48 95 32 63 56 106 27 45 76 51 82 68 60 93 13 34
A continuación se muestran las gráficas de la frecuencia absoluta de los datos de la proteína utilizada.
##
## C W M H Y N F Q K T I S D P R E V G L A
## 8 13 27 32 34 45 48 51 56 60 63 68 75 76 82 84 93 95 106 122
A continuación se muestra la gráfica de la frecuencia relativa de la secuencia.
Se utilizó el banco Swissprot para importar la proteína y sus palabras claves que son Acyltransferasa y Mycobacterium ulcerans en humanos.
# Importar desde base de datos remota
library(seqinr)
seqinr::choosebank("swissprot") # Se utiliza el banco de swissprot
app2 <- query("app2","K=Acyltransferase AND sp=Mycobacterium ulcerans") # Nombre de la proteína
summary(app2) #Resumen de los datos
## Length Class Mode
## call 3 -none- call
## name 1 -none- character
## nelem 1 -none- numeric
## typelist 1 -none- character
## req 10 -none- list
## socket 1 sockconn numeric
Se utilizó la librería APE para importar listas múltiples de genes relacionados al Myc Ulcerans en humanos
#Importando listas múltiples usando APE
acs.num <- c("AF177765.1","AF177766.1 ","AH009665.2","AL160272.16","CH471090.1")
library(ape)
myco <- ape::read.GenBank(acs.num, as.character = T)
Analizando los datos que se obtuvieron con APE:
summary(myco) #Resumen de datos
## Length Class Mode
## AF177765.1 18974 -none- character
## AF177766.1 2576 -none- character
## AH009665.2 9679 -none- character
## AL160272.16 146978 -none- character
## CH471090.1 25070985 -none- character
names(myco)# Nombres de las secuencias
## [1] "AF177765.1" "AF177766.1 " "AH009665.2" "AL160272.16" "CH471090.1"
round(table(myco$AF177765.1)/length(myco$AF177765.1)*100,1)
##
## a c g t
## 30.4 19.0 19.3 31.3
##
## a c g t
## 30.4 19.0 19.3 31.3
##
## a c g t
## 5760 3607 3669 5938
##
## a c g t
## 735 535 537 769
##
## a c g n t
## 2808 1840 1870 212 2949
##
## a c g t
## 43340 28974 29081 45583
##
## a c g n t
## 6602494 5832829 5846425 194611 6594626
## png
## 2
Frecuencia relativa de los aminoácidos, muestra los nucleótidos más representativos. Se utilizó Z score para que se vea más clara la escala de los datos mostrados.
##
## aaa aac aag aat aca acc acg act
## 1.2171067 0.9025967 1.2334085 0.9229322 1.1372718 0.9849239 0.1511462 0.9893602
## aga agc agg agt ata atc atg att
## 1.3398898 1.1099799 1.4209117 0.9066513 0.7803803 0.7821491 1.1878279 0.9183124
## caa cac cag cat cca ccc ccg cct
## 1.0710814 1.1290591 1.6720548 1.1206912 1.4317430 1.3119620 0.1961087 1.4400898
## cga cgc cgg cgt cta ctc ctg ctt
## 0.1369762 0.1131397 0.2002100 0.2015961 0.8025784 1.3375591 1.5715335 1.2852656
## gaa gac gag gat gca gcc gcg gct
## 1.3014383 0.8596440 1.3048241 0.7631938 1.1572130 1.1464818 0.2150404 1.1591778
## gga ggc ggg ggt gta gtc gtg gtt
## 1.3233982 1.2605815 1.3486280 0.9098700 0.7029416 0.8338749 1.2296757 0.7737137
## taa tac tag tat tca tcc tcg tct
## 0.7785527 0.6916766 0.7402406 0.8367634 1.1907344 1.2862938 0.1282884 1.2965896
## tga tgc tgg tgt tta ttc ttg ttt
## 1.1562673 1.1866682 1.4593954 1.1577874 0.7906705 1.2229840 1.0353291 1.2433153
##
## aa ac ag at ca cc cg
## 4.111186 -4.260692 5.878549 -4.136356 7.071825 5.587602 -22.055523
## ct ga gc gg gt ta tc
## 6.850786 1.683642 -0.699931 6.177692 -5.023685 -9.647782 1.225455
## tg tt
## 7.489024 3.882597
En las gráficas se muestra el porcentaje de GC en diferentes rangos de valores de la lista de genes múltiples importados remotamente con la librería APE.
## $Compo
##
## * A C D E F G H I K L M N P Q R S T V W
## 0 122 8 75 84 48 95 32 63 56 106 27 45 76 51 82 68 60 93 13
## Y
## 34
##
## $Prop
## $Prop$Tiny
## [1] 0.2851373
##
## $Prop$Small
## [1] 0.5185784
##
## $Prop$Aliphatic
## [1] 0.2116317
##
## $Prop$Aromatic
## [1] 0.1025848
##
## $Prop$Non.polar
## [1] 0.5533118
##
## $Prop$Polar
## [1] 0.4466882
##
## $Prop$Charged
## [1] 0.2657512
##
## $Prop$Basic
## [1] 0.1373183
##
## $Prop$Acidic
## [1] 0.128433
##
##
## $Pi
## [1] 5.911408
Se observa la relación entre los aminoácidos de las secuencias y los valores de hidrofobicidad en diferentes escalas.
## Name Three.Letter.Code One.Letter.Code
## 1 Alanine Ala A
## 2 Arginine Arg R
## 3 Asparagine Asn N
## 4 Aspartic Acid Asp D
## 5 Cysteine Cys C
## 6 Glutamic Acid Glu E
## Residue.Type kdHydrophobicitya wwHydrophobicityb hhHydrophobicityc
## 1 Ile 4.5 0.31 -0.60
## 2 Val 4.2 -0.07 -0.31
## 3 Leu 3.8 0.56 -0.55
## 4 Phe 2.8 1.13 -0.32
## 5 Cys 2.5 0.24 -0.13
## 6 Met 1.9 0.23 -0.10
## mfHydrophobicityd ttHydrophobicitye
## 1 -1.56 1.97
## 2 -0.78 1.46
## 3 -1.81 1.82
## 4 -2.20 1.98
## 5 0.49 -0.30
## 6 -0.76 1.40
## [1] 1.8 -4.5 -3.5 -3.5
## three.L Name One.Letter.Code kdHydrophobicitya wwHydrophobicityb
## 1 Ala Alanine A 1.8 -0.17
## 2 Arg Arginine R -4.5 -0.81
## 3 Asn Asparagine N -3.5 -0.42
## 4 Asp Aspartic Acid D -3.5 -1.23
## 5 Cys Cysteine C 2.5 0.24
## 6 Gln Glutamine Q -3.5 -0.58
## hhHydrophobicityc mfHydrophobicityd ttHydrophobicitye Hydro
## 1 0.11 0.00 0.38 0.424
## 2 2.58 3.71 -2.57 -0.318
## 3 2.05 3.47 -1.62 -0.004
## 4 3.49 2.95 -3.27 -0.312
## 5 -0.13 0.49 -0.30 0.560
## 6 2.36 3.01 -1.84 -0.110
## [1] "M" "A" "N" "I" "S" "S" "P" "F" "G" "Q" "N" "E" "W" "L" "V" "E" "E" "M"
## [19] "Y" "R" "K" "F" "R" "D" "D" "P" "S" "S" "V" "D" "P" "S" "W" "H" "E" "F"
## [37] "L" "V" "D" "Y" "N" "P" "E" "S" "T" "Q" "E" "A" "T" "E" "P" "A" "V" "V"
## [55] "K" "P" "A" "A" "A" "P" "A" "K" "P" "A" "P" "A" "P" "A" "P" "A" "K" "P"
## [73] "A" "A" "G" "P" "P" "A" "A" "G" "N" "G" "S" "P" "A" "A" "A" "P" "S" "A"
## [91] "K" "P" "A" "A" "A" "P" "A" "K" "A" "P" "A" "P" "P" "P" "A" "E" "G" "D"
## [109] "E" "M" "Q" "V" "L" "R" "G" "A" "A" "A" "A" "V" "V" "K" "N" "M" "S" "A"
## [127] "S" "L" "D" "V" "P" "T" "A" "T" "S" "V" "R" "A" "V" "P" "A" "K" "L" "L"
## [145] "I" "D" "N" "R" "I" "V" "I" "N" "N" "Q" "L" "K" "R" "N" "R" "G" "G" "K"
## [163] "I" "S" "F" "T" "H" "L" "L" "G" "Y" "A" "L" "V" "Q" "A" "V" "K" "K" "F"
## [181] "P" "N" "M" "N" "R" "H" "Y" "L" "D" "V" "D" "G" "K" "P" "N" "A" "V" "T"
## [199] "P" "A" "H" "T" "N" "L" "G" "L" "A" "I" "D" "L" "Q" "G" "K" "D" "G" "K"
## [217] "R" "A" "L" "V" "V" "A" "G" "I" "K" "R" "C" "E" "T" "M" "R" "F" "A" "Q"
## [235] "F" "V" "T" "A" "Y" "E" "D" "I" "V" "R" "R" "A" "R" "D" "G" "K" "L" "T"
## [253] "A" "E" "D" "F" "S" "G" "V" "T" "I" "S" "L" "T" "N" "P" "G" "T" "I" "G"
## [271] "T" "V" "H" "S" "V" "P" "R" "L" "M" "A" "G" "Q" "G" "A" "I" "I" "G" "V"
## [289] "G" "A" "M" "E" "Y" "P" "A" "E" "F" "Q" "G" "A" "S" "E" "E" "R" "I" "A"
## [307] "E" "L" "G" "I" "G" "K" "L" "I" "T" "L" "T" "S" "T" "Y" "D" "H" "R" "I"
## [325] "I" "Q" "G" "A" "E" "S" "G" "D" "F" "L" "R" "T" "I" "H" "Q" "M" "L" "L"
## [343] "A" "D" "E" "F" "W" "D" "E" "I" "F" "R" "E" "L" "S" "I" "P" "Y" "L" "P"
## [361] "V" "R" "W" "R" "P" "D" "N" "P" "D" "S" "I" "V" "D" "K" "N" "A" "R" "I"
## [379] "I" "E" "L" "I" "A" "A" "Y" "R" "N" "R" "G" "H" "L" "M" "A" "D" "I" "D"
## [397] "P" "L" "R" "L" "D" "K" "T" "R" "F" "R" "S" "H" "P" "D" "L" "D" "V" "C"
## [415] "T" "H" "G" "L" "T" "L" "W" "D" "L" "D" "R" "S" "F" "K" "V" "G" "G" "C"
## [433] "F" "A" "G" "P" "Q" "N" "M" "K" "L" "R" "D" "V" "L" "S" "I" "L" "R" "D"
## [451] "T" "Y" "C" "R" "H" "V" "G" "V" "E" "Y" "T" "H" "I" "L" "E" "P" "E" "Q"
## [469] "Q" "Q" "W" "L" "Q" "Q" "R" "V" "E" "A" "K" "H" "V" "K" "P" "T" "V" "A"
## [487] "Q" "Q" "K" "Y" "V" "L" "S" "K" "L" "N" "A" "A" "E" "A" "F" "E" "T" "F"
## [505] "L" "Q" "T" "K" "Y" "V" "G" "Q" "K" "R" "F" "S" "L" "E" "G" "A" "E" "S"
## [523] "V" "I" "P" "M" "M" "D" "A" "A" "I" "D" "Q" "C" "A" "E" "Y" "G" "L" "D"
## [541] "E" "V" "V" "I" "G" "M" "P" "H" "R" "G" "R" "L" "N" "V" "L" "A" "N" "I"
## [559] "V" "G" "K" "P" "Y" "S" "Q" "I" "F" "S" "E" "F" "E" "G" "N" "L" "N" "P"
## [577] "S" "Q" "A" "H" "G" "S" "G" "D" "V" "K" "Y" "H" "L" "G" "A" "T" "G" "V"
## [595] "Y" "L" "Q" "M" "F" "G" "D" "N" "D" "I" "Q" "V" "S" "L" "T" "A" "N" "P"
## [613] "S" "H" "L" "E" "A" "V" "D" "P" "V" "L" "E" "G" "L" "V" "R" "A" "K" "Q"
## [631] "D" "L" "L" "E" "H" "G" "E" "T" "D" "T" "E" "N" "Q" "R" "A" "F" "S" "V"
## [649] "V" "P" "M" "M" "L" "H" "G" "D" "A" "A" "F" "A" "G" "Q" "G" "V" "V" "A"
## [667] "E" "T" "L" "N" "L" "A" "N" "L" "P" "G" "Y" "R" "V" "G" "G" "T" "I" "H"
## [685] "I" "I" "V" "N" "N" "Q" "I" "G" "F" "T" "T" "A" "P" "E" "Y" "S" "R" "S"
## [703] "T" "E" "Y" "C" "T" "D" "V" "A" "K" "T" "I" "G" "A" "P" "I" "F" "H" "V"
## [721] "N" "G" "D" "D" "P" "E" "A" "C" "V" "W" "V" "A" "R" "L" "A" "V" "D" "F"
## [739] "R" "Q" "R" "F" "N" "K" "D" "V" "I" "I" "D" "M" "L" "C" "Y" "R" "R" "R"
## [757] "G" "H" "N" "E" "G" "D" "D" "P" "S" "M" "T" "N" "P" "R" "M" "Y" "D" "V"
## [775] "V" "D" "T" "K" "R" "G" "V" "R" "K" "S" "Y" "T" "E" "A" "L" "I" "G" "R"
## [793] "G" "D" "I" "S" "I" "K" "E" "A" "E" "D" "A" "L" "R" "D" "Y" "Q" "G" "Q"
## [811] "L" "E" "Q" "V" "F" "N" "E" "V" "R" "E" "L" "E" "K" "H" "G" "A" "Q" "P"
## [829] "S" "E" "S" "V" "E" "S" "D" "Q" "M" "I" "P" "A" "G" "L" "A" "T" "A" "V"
## [847] "D" "K" "S" "L" "L" "A" "R" "I" "G" "D" "A" "F" "L" "A" "V" "P" "D" "G"
## [865] "F" "T" "T" "H" "P" "R" "V" "Q" "P" "V" "L" "E" "K" "R" "R" "E" "M" "A"
## [883] "Y" "E" "G" "K" "I" "D" "W" "A" "F" "A" "E" "L" "L" "A" "L" "G" "S" "L"
## [901] "V" "A" "E" "G" "K" "L" "V" "R" "F" "S" "G" "Q" "D" "T" "R" "R" "G" "T"
## [919] "F" "S" "Q" "R" "H" "S" "V" "I" "I" "D" "R" "H" "T" "R" "E" "E" "F" "T"
## [937] "P" "L" "Q" "L" "L" "T" "T" "N" "K" "D" "G" "S" "P" "T" "G" "G" "K" "F"
## [955] "L" "V" "Y" "D" "S" "P" "L" "S" "E" "Y" "A" "A" "V" "G" "F" "E" "Y" "G"
## [973] "Y" "T" "V" "G" "N" "P" "D" "A" "V" "V" "L" "W" "E" "A" "Q" "F" "G" "D"
## [991] "F" "V" "N" "G" "A" "Q" "S" "I" "I" "D" "E" "F" "I" "S" "S" "G" "E" "A"
## [1009] "K" "W" "G" "Q" "L" "S" "N" "V" "V" "L" "L" "L" "P" "H" "G" "H" "E" "G"
## [1027] "Q" "G" "P" "D" "H" "T" "S" "G" "R" "I" "E" "R" "F" "L" "Q" "L" "W" "A"
## [1045] "E" "G" "S" "M" "T" "I" "A" "M" "P" "S" "T" "P" "S" "N" "Y" "F" "H" "L"
## [1063] "L" "R" "R" "H" "A" "L" "D" "G" "I" "Q" "R" "P" "L" "I" "V" "F" "T" "P"
## [1081] "K" "S" "M" "L" "R" "N" "K" "A" "A" "V" "S" "D" "I" "K" "D" "F" "T" "E"
## [1099] "I" "K" "F" "R" "S" "V" "L" "E" "E" "P" "T" "Y" "E" "D" "G" "V" "G" "D"
## [1117] "R" "N" "L" "V" "N" "R" "I" "L" "L" "T" "S" "G" "K" "I" "Y" "Y" "E" "M"
## [1135] "V" "A" "R" "K" "A" "K" "D" "K" "R" "E" "D" "V" "A" "I" "V" "R" "V" "E"
## [1153] "Q" "L" "A" "P" "L" "P" "R" "R" "R" "L" "R" "E" "T" "L" "D" "R" "Y" "P"
## [1171] "N" "A" "K" "E" "F" "F" "W" "V" "Q" "E" "E" "P" "A" "N" "Q" "G" "A" "W"
## [1189] "P" "R" "F" "G" "L" "E" "L" "P" "E" "L" "L" "P" "E" "K" "L" "S" "G" "V"
## [1207] "K" "R" "I" "S" "R" "R" "A" "M" "S" "A" "P" "S" "S" "G" "S" "S" "K" "V"
## [1225] "H" "A" "V" "E" "Q" "Q" "E" "I" "L" "D" "T" "A" "F" "G"
Se observan señales con distintos picos a lo largo del tamaño de la secuencia que relacionan hidropatía vs su posición a diferentes escalas.
A través de librerías como Bioconductor, BiocManager y GenomeGraphs podemos visualizar data genomica, a través de regiones pintadas del cromosoma 6 que hacen referencia a proteínas codificantes y no en las mismas zonas de la secuencia de ADN, en este caso usamos la data de homo sapiens y AGPAT4 1-acylglycerol-3-phosphate O-acyltransferase 4 (ENSG00000026652).
## Loading required package: biomaRt
##
## Attaching package: 'biomaRt'
## The following object is masked from 'package:seqinr':
##
## getSequence
## Loading required package: grid