#Inicialización de variables y carga de librerías

Carga de paquetes

library(seqinr)
## Warning: package 'seqinr' was built under R version 3.5.3
library(ape)
## 
## Attaching package: 'ape'
## The following objects are masked from 'package:seqinr':
## 
##     as.alignment, consensus

Selección de secuencia

El desarrollo de las gráficas de caracerísticas de una secuencia serán realizadas de la proteina: Multifunctional 2-oxoglutarate metabolism enzyme.

path <- "D:/OCTAVO CICLO/Bioinformatica/CLASE 2/data/A0PVU7.fasta.txt"
oxa <- seqinr::read.fasta(path,seqtype="AA")

Tabla de contingencia de valores

## 
##   A   C   D   E   F   G   H   I   K   L   M   N   P   Q   R   S   T   V   W   Y 
## 122   8  75  84  48  95  32  63  56 106  27  45  76  51  82  68  60  93  13  34

Frecuencia absoluta

A continuación se muestran las gráficas de la frecuencia absoluta de los datos de la proteína utilizada.

## 
##   C   W   M   H   Y   N   F   Q   K   T   I   S   D   P   R   E   V   G   L   A 
##   8  13  27  32  34  45  48  51  56  60  63  68  75  76  82  84  93  95 106 122

Frecuencia relativa

A continuación se muestra la gráfica de la frecuencia relativa de la secuencia.

Utilizando SEQINR

Se utilizó el banco Swissprot para importar la proteína y sus palabras claves que son Acyltransferasa y Mycobacterium ulcerans en humanos.

# Importar desde base de datos remota
library(seqinr)
seqinr::choosebank("swissprot") # Se utiliza el banco de swissprot 
app2 <- query("app2","K=Acyltransferase AND sp=Mycobacterium ulcerans") # Nombre de la proteína
summary(app2) #Resumen de los datos
##          Length Class    Mode     
## call      3     -none-   call     
## name      1     -none-   character
## nelem     1     -none-   numeric  
## typelist  1     -none-   character
## req      10     -none-   list     
## socket    1     sockconn numeric

Utilizando APE

Se utilizó la librería APE para importar listas múltiples de genes relacionados al Myc Ulcerans en humanos

#Importando listas múltiples usando APE

acs.num <- c("AF177765.1","AF177766.1 ","AH009665.2","AL160272.16","CH471090.1") 

library(ape)
myco <- ape::read.GenBank(acs.num, as.character = T) 

Analizando los datos que se obtuvieron con APE:

summary(myco) #Resumen de datos
##             Length   Class  Mode     
## AF177765.1     18974 -none- character
## AF177766.1      2576 -none- character
## AH009665.2      9679 -none- character
## AL160272.16   146978 -none- character
## CH471090.1  25070985 -none- character
names(myco)# Nombres de las secuencias
## [1] "AF177765.1"  "AF177766.1 " "AH009665.2"  "AL160272.16" "CH471090.1"
round(table(myco$AF177765.1)/length(myco$AF177765.1)*100,1)
## 
##    a    c    g    t 
## 30.4 19.0 19.3 31.3

Generar PDF con tablas de valores de las 5 secuencias.

## 
##    a    c    g    t 
## 30.4 19.0 19.3 31.3
## 
##    a    c    g    t 
## 5760 3607 3669 5938
## 
##   a   c   g   t 
## 735 535 537 769
## 
##    a    c    g    n    t 
## 2808 1840 1870  212 2949
## 
##     a     c     g     t 
## 43340 28974 29081 45583
## 
##       a       c       g       n       t 
## 6602494 5832829 5846425  194611 6594626
## png 
##   2

Análisis estadístico

Análisis RHO

Frecuencia relativa de los aminoácidos, muestra los nucleótidos más representativos. Se utilizó Z score para que se vea más clara la escala de los datos mostrados.

## 
##       aaa       aac       aag       aat       aca       acc       acg       act 
## 1.2171067 0.9025967 1.2334085 0.9229322 1.1372718 0.9849239 0.1511462 0.9893602 
##       aga       agc       agg       agt       ata       atc       atg       att 
## 1.3398898 1.1099799 1.4209117 0.9066513 0.7803803 0.7821491 1.1878279 0.9183124 
##       caa       cac       cag       cat       cca       ccc       ccg       cct 
## 1.0710814 1.1290591 1.6720548 1.1206912 1.4317430 1.3119620 0.1961087 1.4400898 
##       cga       cgc       cgg       cgt       cta       ctc       ctg       ctt 
## 0.1369762 0.1131397 0.2002100 0.2015961 0.8025784 1.3375591 1.5715335 1.2852656 
##       gaa       gac       gag       gat       gca       gcc       gcg       gct 
## 1.3014383 0.8596440 1.3048241 0.7631938 1.1572130 1.1464818 0.2150404 1.1591778 
##       gga       ggc       ggg       ggt       gta       gtc       gtg       gtt 
## 1.3233982 1.2605815 1.3486280 0.9098700 0.7029416 0.8338749 1.2296757 0.7737137 
##       taa       tac       tag       tat       tca       tcc       tcg       tct 
## 0.7785527 0.6916766 0.7402406 0.8367634 1.1907344 1.2862938 0.1282884 1.2965896 
##       tga       tgc       tgg       tgt       tta       ttc       ttg       ttt 
## 1.1562673 1.1866682 1.4593954 1.1577874 0.7906705 1.2229840 1.0353291 1.2433153

## 
##         aa         ac         ag         at         ca         cc         cg 
##   4.111186  -4.260692   5.878549  -4.136356   7.071825   5.587602 -22.055523 
##         ct         ga         gc         gg         gt         ta         tc 
##   6.850786   1.683642  -0.699931   6.177692  -5.023685  -9.647782   1.225455 
##         tg         tt 
##   7.489024   3.882597

Análisis: Descubriendo CG(cpG island)

En las gráficas se muestra el porcentaje de GC en diferentes rangos de valores de la lista de genes múltiples importados remotamente con la librería APE.

Estadística de la proteína

## $Compo
## 
##   *   A   C   D   E   F   G   H   I   K   L   M   N   P   Q   R   S   T   V   W 
##   0 122   8  75  84  48  95  32  63  56 106  27  45  76  51  82  68  60  93  13 
##   Y 
##  34 
## 
## $Prop
## $Prop$Tiny
## [1] 0.2851373
## 
## $Prop$Small
## [1] 0.5185784
## 
## $Prop$Aliphatic
## [1] 0.2116317
## 
## $Prop$Aromatic
## [1] 0.1025848
## 
## $Prop$Non.polar
## [1] 0.5533118
## 
## $Prop$Polar
## [1] 0.4466882
## 
## $Prop$Charged
## [1] 0.2657512
## 
## $Prop$Basic
## [1] 0.1373183
## 
## $Prop$Acidic
## [1] 0.128433
## 
## 
## $Pi
## [1] 5.911408

Tablas de Hidrofobicidad

Se observa la relación entre los aminoácidos de las secuencias y los valores de hidrofobicidad en diferentes escalas.

##            Name Three.Letter.Code One.Letter.Code
## 1       Alanine               Ala               A
## 2      Arginine               Arg               R
## 3    Asparagine               Asn               N
## 4 Aspartic Acid               Asp               D
## 5      Cysteine               Cys               C
## 6 Glutamic Acid               Glu               E
##   Residue.Type kdHydrophobicitya wwHydrophobicityb hhHydrophobicityc
## 1          Ile               4.5              0.31             -0.60
## 2          Val               4.2             -0.07             -0.31
## 3          Leu               3.8              0.56             -0.55
## 4          Phe               2.8              1.13             -0.32
## 5          Cys               2.5              0.24             -0.13
## 6          Met               1.9              0.23             -0.10
##   mfHydrophobicityd ttHydrophobicitye
## 1             -1.56              1.97
## 2             -0.78              1.46
## 3             -1.81              1.82
## 4             -2.20              1.98
## 5              0.49             -0.30
## 6             -0.76              1.40
## [1]  1.8 -4.5 -3.5 -3.5
##   three.L          Name One.Letter.Code kdHydrophobicitya wwHydrophobicityb
## 1     Ala       Alanine               A               1.8             -0.17
## 2     Arg      Arginine               R              -4.5             -0.81
## 3     Asn    Asparagine               N              -3.5             -0.42
## 4     Asp Aspartic Acid               D              -3.5             -1.23
## 5     Cys      Cysteine               C               2.5              0.24
## 6     Gln     Glutamine               Q              -3.5             -0.58
##   hhHydrophobicityc mfHydrophobicityd ttHydrophobicitye  Hydro
## 1              0.11              0.00              0.38  0.424
## 2              2.58              3.71             -2.57 -0.318
## 3              2.05              3.47             -1.62 -0.004
## 4              3.49              2.95             -3.27 -0.312
## 5             -0.13              0.49             -0.30  0.560
## 6              2.36              3.01             -1.84 -0.110
##    [1] "M" "A" "N" "I" "S" "S" "P" "F" "G" "Q" "N" "E" "W" "L" "V" "E" "E" "M"
##   [19] "Y" "R" "K" "F" "R" "D" "D" "P" "S" "S" "V" "D" "P" "S" "W" "H" "E" "F"
##   [37] "L" "V" "D" "Y" "N" "P" "E" "S" "T" "Q" "E" "A" "T" "E" "P" "A" "V" "V"
##   [55] "K" "P" "A" "A" "A" "P" "A" "K" "P" "A" "P" "A" "P" "A" "P" "A" "K" "P"
##   [73] "A" "A" "G" "P" "P" "A" "A" "G" "N" "G" "S" "P" "A" "A" "A" "P" "S" "A"
##   [91] "K" "P" "A" "A" "A" "P" "A" "K" "A" "P" "A" "P" "P" "P" "A" "E" "G" "D"
##  [109] "E" "M" "Q" "V" "L" "R" "G" "A" "A" "A" "A" "V" "V" "K" "N" "M" "S" "A"
##  [127] "S" "L" "D" "V" "P" "T" "A" "T" "S" "V" "R" "A" "V" "P" "A" "K" "L" "L"
##  [145] "I" "D" "N" "R" "I" "V" "I" "N" "N" "Q" "L" "K" "R" "N" "R" "G" "G" "K"
##  [163] "I" "S" "F" "T" "H" "L" "L" "G" "Y" "A" "L" "V" "Q" "A" "V" "K" "K" "F"
##  [181] "P" "N" "M" "N" "R" "H" "Y" "L" "D" "V" "D" "G" "K" "P" "N" "A" "V" "T"
##  [199] "P" "A" "H" "T" "N" "L" "G" "L" "A" "I" "D" "L" "Q" "G" "K" "D" "G" "K"
##  [217] "R" "A" "L" "V" "V" "A" "G" "I" "K" "R" "C" "E" "T" "M" "R" "F" "A" "Q"
##  [235] "F" "V" "T" "A" "Y" "E" "D" "I" "V" "R" "R" "A" "R" "D" "G" "K" "L" "T"
##  [253] "A" "E" "D" "F" "S" "G" "V" "T" "I" "S" "L" "T" "N" "P" "G" "T" "I" "G"
##  [271] "T" "V" "H" "S" "V" "P" "R" "L" "M" "A" "G" "Q" "G" "A" "I" "I" "G" "V"
##  [289] "G" "A" "M" "E" "Y" "P" "A" "E" "F" "Q" "G" "A" "S" "E" "E" "R" "I" "A"
##  [307] "E" "L" "G" "I" "G" "K" "L" "I" "T" "L" "T" "S" "T" "Y" "D" "H" "R" "I"
##  [325] "I" "Q" "G" "A" "E" "S" "G" "D" "F" "L" "R" "T" "I" "H" "Q" "M" "L" "L"
##  [343] "A" "D" "E" "F" "W" "D" "E" "I" "F" "R" "E" "L" "S" "I" "P" "Y" "L" "P"
##  [361] "V" "R" "W" "R" "P" "D" "N" "P" "D" "S" "I" "V" "D" "K" "N" "A" "R" "I"
##  [379] "I" "E" "L" "I" "A" "A" "Y" "R" "N" "R" "G" "H" "L" "M" "A" "D" "I" "D"
##  [397] "P" "L" "R" "L" "D" "K" "T" "R" "F" "R" "S" "H" "P" "D" "L" "D" "V" "C"
##  [415] "T" "H" "G" "L" "T" "L" "W" "D" "L" "D" "R" "S" "F" "K" "V" "G" "G" "C"
##  [433] "F" "A" "G" "P" "Q" "N" "M" "K" "L" "R" "D" "V" "L" "S" "I" "L" "R" "D"
##  [451] "T" "Y" "C" "R" "H" "V" "G" "V" "E" "Y" "T" "H" "I" "L" "E" "P" "E" "Q"
##  [469] "Q" "Q" "W" "L" "Q" "Q" "R" "V" "E" "A" "K" "H" "V" "K" "P" "T" "V" "A"
##  [487] "Q" "Q" "K" "Y" "V" "L" "S" "K" "L" "N" "A" "A" "E" "A" "F" "E" "T" "F"
##  [505] "L" "Q" "T" "K" "Y" "V" "G" "Q" "K" "R" "F" "S" "L" "E" "G" "A" "E" "S"
##  [523] "V" "I" "P" "M" "M" "D" "A" "A" "I" "D" "Q" "C" "A" "E" "Y" "G" "L" "D"
##  [541] "E" "V" "V" "I" "G" "M" "P" "H" "R" "G" "R" "L" "N" "V" "L" "A" "N" "I"
##  [559] "V" "G" "K" "P" "Y" "S" "Q" "I" "F" "S" "E" "F" "E" "G" "N" "L" "N" "P"
##  [577] "S" "Q" "A" "H" "G" "S" "G" "D" "V" "K" "Y" "H" "L" "G" "A" "T" "G" "V"
##  [595] "Y" "L" "Q" "M" "F" "G" "D" "N" "D" "I" "Q" "V" "S" "L" "T" "A" "N" "P"
##  [613] "S" "H" "L" "E" "A" "V" "D" "P" "V" "L" "E" "G" "L" "V" "R" "A" "K" "Q"
##  [631] "D" "L" "L" "E" "H" "G" "E" "T" "D" "T" "E" "N" "Q" "R" "A" "F" "S" "V"
##  [649] "V" "P" "M" "M" "L" "H" "G" "D" "A" "A" "F" "A" "G" "Q" "G" "V" "V" "A"
##  [667] "E" "T" "L" "N" "L" "A" "N" "L" "P" "G" "Y" "R" "V" "G" "G" "T" "I" "H"
##  [685] "I" "I" "V" "N" "N" "Q" "I" "G" "F" "T" "T" "A" "P" "E" "Y" "S" "R" "S"
##  [703] "T" "E" "Y" "C" "T" "D" "V" "A" "K" "T" "I" "G" "A" "P" "I" "F" "H" "V"
##  [721] "N" "G" "D" "D" "P" "E" "A" "C" "V" "W" "V" "A" "R" "L" "A" "V" "D" "F"
##  [739] "R" "Q" "R" "F" "N" "K" "D" "V" "I" "I" "D" "M" "L" "C" "Y" "R" "R" "R"
##  [757] "G" "H" "N" "E" "G" "D" "D" "P" "S" "M" "T" "N" "P" "R" "M" "Y" "D" "V"
##  [775] "V" "D" "T" "K" "R" "G" "V" "R" "K" "S" "Y" "T" "E" "A" "L" "I" "G" "R"
##  [793] "G" "D" "I" "S" "I" "K" "E" "A" "E" "D" "A" "L" "R" "D" "Y" "Q" "G" "Q"
##  [811] "L" "E" "Q" "V" "F" "N" "E" "V" "R" "E" "L" "E" "K" "H" "G" "A" "Q" "P"
##  [829] "S" "E" "S" "V" "E" "S" "D" "Q" "M" "I" "P" "A" "G" "L" "A" "T" "A" "V"
##  [847] "D" "K" "S" "L" "L" "A" "R" "I" "G" "D" "A" "F" "L" "A" "V" "P" "D" "G"
##  [865] "F" "T" "T" "H" "P" "R" "V" "Q" "P" "V" "L" "E" "K" "R" "R" "E" "M" "A"
##  [883] "Y" "E" "G" "K" "I" "D" "W" "A" "F" "A" "E" "L" "L" "A" "L" "G" "S" "L"
##  [901] "V" "A" "E" "G" "K" "L" "V" "R" "F" "S" "G" "Q" "D" "T" "R" "R" "G" "T"
##  [919] "F" "S" "Q" "R" "H" "S" "V" "I" "I" "D" "R" "H" "T" "R" "E" "E" "F" "T"
##  [937] "P" "L" "Q" "L" "L" "T" "T" "N" "K" "D" "G" "S" "P" "T" "G" "G" "K" "F"
##  [955] "L" "V" "Y" "D" "S" "P" "L" "S" "E" "Y" "A" "A" "V" "G" "F" "E" "Y" "G"
##  [973] "Y" "T" "V" "G" "N" "P" "D" "A" "V" "V" "L" "W" "E" "A" "Q" "F" "G" "D"
##  [991] "F" "V" "N" "G" "A" "Q" "S" "I" "I" "D" "E" "F" "I" "S" "S" "G" "E" "A"
## [1009] "K" "W" "G" "Q" "L" "S" "N" "V" "V" "L" "L" "L" "P" "H" "G" "H" "E" "G"
## [1027] "Q" "G" "P" "D" "H" "T" "S" "G" "R" "I" "E" "R" "F" "L" "Q" "L" "W" "A"
## [1045] "E" "G" "S" "M" "T" "I" "A" "M" "P" "S" "T" "P" "S" "N" "Y" "F" "H" "L"
## [1063] "L" "R" "R" "H" "A" "L" "D" "G" "I" "Q" "R" "P" "L" "I" "V" "F" "T" "P"
## [1081] "K" "S" "M" "L" "R" "N" "K" "A" "A" "V" "S" "D" "I" "K" "D" "F" "T" "E"
## [1099] "I" "K" "F" "R" "S" "V" "L" "E" "E" "P" "T" "Y" "E" "D" "G" "V" "G" "D"
## [1117] "R" "N" "L" "V" "N" "R" "I" "L" "L" "T" "S" "G" "K" "I" "Y" "Y" "E" "M"
## [1135] "V" "A" "R" "K" "A" "K" "D" "K" "R" "E" "D" "V" "A" "I" "V" "R" "V" "E"
## [1153] "Q" "L" "A" "P" "L" "P" "R" "R" "R" "L" "R" "E" "T" "L" "D" "R" "Y" "P"
## [1171] "N" "A" "K" "E" "F" "F" "W" "V" "Q" "E" "E" "P" "A" "N" "Q" "G" "A" "W"
## [1189] "P" "R" "F" "G" "L" "E" "L" "P" "E" "L" "L" "P" "E" "K" "L" "S" "G" "V"
## [1207] "K" "R" "I" "S" "R" "R" "A" "M" "S" "A" "P" "S" "S" "G" "S" "S" "K" "V"
## [1225] "H" "A" "V" "E" "Q" "Q" "E" "I" "L" "D" "T" "A" "F" "G"

Hidropatía de la secuencia

Se observan señales con distintos picos a lo largo del tamaño de la secuencia que relacionan hidropatía vs su posición a diferentes escalas.

Visualización de genes y transcriptos

A través de librerías como Bioconductor, BiocManager y GenomeGraphs podemos visualizar data genomica, a través de regiones pintadas del cromosoma 6 que hacen referencia a proteínas codificantes y no en las mismas zonas de la secuencia de ADN, en este caso usamos la data de homo sapiens y AGPAT4 1-acylglycerol-3-phosphate O-acyltransferase 4 (ENSG00000026652).

## Loading required package: biomaRt
## 
## Attaching package: 'biomaRt'
## The following object is masked from 'package:seqinr':
## 
##     getSequence
## Loading required package: grid