Importación y escritura.

RPub de esta clase

getwd()

## [1] "/home/alejandro/DeepenData/Taller_sept_oct_nov_dic_2020"

paste0(getwd(),'/hola')

## [1] "/home/alejandro/DeepenData/Taller_sept_oct_nov_dic_2020/hola"

file.path(getwd(),'hola')

## [1] "/home/alejandro/DeepenData/Taller_sept_oct_nov_dic_2020/hola"

file.path(getwd(),'hola','hola')

## [1] "/home/alejandro/DeepenData/Taller_sept_oct_nov_dic_2020/hola/hola"

Manipulando data local

mtcars:

    mpg: Miles/(US) gallon
    cyl: Number of cylinders
    disp: Displacement (cu.in.)
    hp: Gross horsepower
    drat: Rear axle ratio
    wt: Weight (1000 lbs)
    qsec: 1/4 mile time
    vs: V/S
    am: Transmission (0 = automatic, 1 = manual)
    gear: Number of forward gears
    carb: Number of carburetors

#install.packages("devtools", dependencies = TRUE)
#install.packages("curl", dependencies = TRUE)
#install.packages("tidyverse", dependencies = TRUE)
library(readr)
my.data <- mtcars
my.data

# Exportando data
write_csv(my.data,  path = 'mtcars.csv' ) # Coma Separated Value
write_tsv(my.data,  path = 'mtcars.tsv')  # Tab Separated Value
write_delim(my.data,path = 'mtcars.txt', delim = ";") # Texto con delimitador ";"
write_delim(my.data,path = 'mtcars.hola', delim = "*_*") # Texto con delimitador "*_*"

openxlsx::write.xlsx(my.data, file = "mtcars.xlsx") # Excel

# (Re)Importando data
mtcars.csv<-read_csv('mtcars.csv')
mtcars.tsv<-read_tsv('mtcars.tsv')
mtcars.txt<-read_delim('mtcars.txt', delim = ";")
mtcars.hola<-read_delim('mtcars.hola', delim = "*")

library(readxl) # Importando un Excel
mtcars.xlsx<-read_excel('mtcars.xlsx')

Manipulando columna rownames

library(tibble) 
rownames_to_column(my.data, var = "Car brand")   -> my.data.2
library(magrittr) # 
my.data %>% rownames_to_column(var = "Car brand")-> my.data.2
#####Con pipes#############
my.data.2 %>%   write_csv('mtcars.csv')
my.data.2 %>%   write_tsv('mtcars.tsv')
my.data.2 %>% write_delim('mtcars.txt',  delim = ";")
my.data.2 %>% write_delim('mtcars.hola', delim = "*_*")
my.data.2 %>% openxlsx::write.xlsx("mtcars.xlsx")

###########Lectura##################
read_csv('mtcars.csv', skip = 0)                         ->mtcars.csv
read_tsv('mtcars.tsv', skip = 0)                         ->mtcars.tsv
read_delim('mtcars.txt', delim = ";", skip = 0)          ->mtcars.txt
read_delim('mtcars.hola', delim = "*_*", skip = 0)       ->mtcars.hola
read_excel('mtcars.xlsx', skip = 0)                      ->mtcars.xlsx

########Chequear igualdad###############
library(dplyr)
all_equal(mtcars.csv,mtcars.tsv)   # La data del csv deberia ser igual a la del tsv

## [1] TRUE

all_equal(mtcars.txt,mtcars.hola)  # La data del txt deberia ser igual a la del .hola

## [1] TRUE

all_equal(mtcars.hola,mtcars.xlsx) # La data del txt deberia ser igual a la del Excel

## [1] TRUE

list(mtcars.csv, mtcars.tsv, mtcars.txt, mtcars.hola,mtcars.xlsx) %>% unique() %>% length() # TODO: porque da dos salidas? Una es una tabla y el otro es una consola?

## [1] 2

Descargando data online

library(utils)
library(httr)
#Para LINUX y WINDOMS  usar:
my.github.url <- 'https://github.com/DeepenData/Computational-Biology-and-Bioinformatics/raw/master/Wang2018_supplemental_DEGs.xlsx'
GET(my.github.url, write_disk(tf <- tempfile(fileext = ".xlsx")))

## Response [https://raw.githubusercontent.com/DeepenData/Computational-Biology-and-Bioinformatics/master/Wang2018_supplemental_DEGs.xlsx]
##   Date: 2020-09-26 00:12
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 75.6 kB
## <ON DISK>  /tmp/RtmpLnqSq0/filecf511adf424.xlsx

hola.github <- read_excel(tf, skip = 1) 
hola.github

Descargando data en line

###Dropbox
my.dropbox.url <- 'https://www.dropbox.com/s/j3kiivpcbghpb4v/log2FC.csv'
#Para windows usar:
download.file(url= my.dropbox.url,  destfile= 'log2FC.csv', method = "libcurl") 
#Para linux usar:
download.file(url= my.dropbox.url,  destfile= 'log2FC.csv', method = "wget") 
hola.dropbox <- read_csv("log2FC.csv")
hola.dropbox

library(utils)
library(httr)
###Sitio web arbitrario
#Para LINUX USAR:
my.supplementary.url <- 'https://www.pnas.org/highwire/filestream/794560/field_highwire_adjunct_files/0/pnas.1800165115.sd01.xlsx'
download.file(url= my.supplementary.url,  destfile= 'pnas.1800165115.sd01.xlsx', method = "wget") # ERROR!
hola.supplementary <- read_excel("pnas.1800165115.sd01.xlsx")
#Para LINUX y WINDOMS  usar:
GET(my.supplementary.url, write_disk(tf <- tempfile(fileext = ".xlsx")))

## Response [https://www.pnas.org/content/pnas/suppl/2018/02/07/1800165115.DCSupplemental/pnas.1800165115.sd01.xlsx]
##   Date: 2020-09-26 00:12
##   Status: 200
##   Content-Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
##   Size: 61.2 kB
## <ON DISK>  /tmp/RtmpLnqSq0/filecf525a4caf8.xlsx

hola.supplementary <- read_excel(tf, skip = 1) 
hola.supplementary

read_csv('https://raw.githubusercontent.com/DeepenData/Computational-Biology-and-Bioinformatics/master/labels.csv')

Datos omicos

genómica biomartr tutorials: - https://docs.ropensci.org/biomartr/articles/ - https://cran.r-project.org/web/packages/biomartr/vignettes/Sequence_Retrieval.html - https://cran.r-project.org/web/packages/biomartr/readme/README.html

#install.packages("BiocManager", dependencies = TRUE)
#BiocManager::install()
#BiocManager::install("Biostrings")
#BiocManager::install("biomaRt")
#install.packages("biomartr")#, dependencies = TRUE)

library(biomartr)
library(magrittr)
#Objetos clase DNAStringSet
inmortal <- getGenome( db       = "refseq", #Más bases de datos
                       organism = "Thermococcus gammatolerans",
                       path     = file.path("_ncbi_downloads","genomes") ) %>%  read_genome()
inmortal

## DNAStringSet object of length 1:
##       width seq                                             names               
## [1] 2045438 GTTGATTACCCAATCTTCGCCT...GGAAGATGATATTATAAGGCAG NC_012804.1 Therm...

Dato impresionante: se pueden leer genomas directamente desde el url del NCBI

url.NCBI <- 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.35_GRCh38.p9/GCF_000001405.35_GRCh38.p9_cds_from_genomic.fna.gz'
human    <- read_genome(url.NCBI)
human %>% names() %>% sample(10)

##  [1] "lcl|NC_000003.12_cds_XP_016862545.1_23624 [gene=CCDC14] [db_xref=GeneID:64770] [protein=coiled-coil domain-containing protein 14 isoform X4] [protein_id=XP_016862545.1] [location=complement(join(123914779..123915718,123931102..123931234,123931308..123931526,123933673..123933755,123944849..123944990,123946803..123947319,123948691..123948785,123948896..123949132,123952595..123952610))]"                                                                                       
##  [2] "lcl|NC_000017.11_cds_XP_006721793.1_88054 [gene=TMEM92] [db_xref=GeneID:162461] [protein=transmembrane protein 92 isoform X1] [protein_id=XP_006721793.1] [location=join(50274502..50274570,50277715..50277740,50278556..50278630,50278801..50278954,50279195..50279308)]"                                                                                                                                                                                                                
##  [3] "lcl|NC_000010.11_cds_XP_005269439.1_58024 [gene=SMNDC1] [db_xref=GeneID:10285] [protein=survival of motor neuron-related-splicing factor 30 isoform X1] [protein_id=XP_005269439.1] [location=complement(join(110294150..110294287,110295228..110295381,110297567..110297728,110298648..110298790,110300547..110300549))]"                                                                                                                                                                
##  [4] "lcl|NC_000005.10_cds_NP_061746.1_33467 [gene=PCDHGB2] [db_xref=CCDS:CCDS54924.1,GeneID:56103] [protein=protocadherin gamma-B2 isoform 1 precursor] [protein_id=NP_061746.1] [location=join(141360136..141362556,141494807..141494865,141505393..141505481,141510947..141511173)]"                                                                                                                                                                                                         
##  [5] "lcl|NC_000022.11_cds_NP_060299.4_102607 [gene=CECR5] [db_xref=CCDS:CCDS13741.1,GeneID:27440] [protein=cat eye syndrome critical region protein 5 isoform 1] [protein_id=NP_060299.4] [location=complement(join(17138021..17138357,17138550..17138738,17141059..17141233,17143098..17143131,17145024..17145117,17148448..17148560,17149542..17149745,17165209..17165244))]"                                                                                                                
##  [6] "lcl|NC_000005.10_cds_NP_057728.1_32629 [gene=PRR16] [db_xref=CCDS:CCDS4127.1,GeneID:51334] [protein=protein Largen isoform 2] [protein_id=NP_057728.1] [location=join(120481204..120481293,120685954..120686709)]"                                                                                                                                                                                                                                                                        
##  [7] "lcl|NC_000003.12_cds_NP_001186908.1_24546 [gene=PLSCR2] [db_xref=CCDS:CCDS75029.1,GeneID:57047] [protein=phospholipid scramblase 2 isoform 2] [protein_id=NP_001186908.1] [location=complement(join(146441792..146441821,146449206..146449367,146454002..146454163,146455239..146455459,146458411..146458453,146459848..146460083,146495895..146495922))]"                                                                                                                                
##  [8] "lcl|NC_000004.12_cds_XP_016863462.1_28014 [gene=C4orf22] [db_xref=GeneID:255119] [protein=uncharacterized protein C4orf22 isoform X2] [protein_id=XP_016863462.1] [location=join(80321435..80321463,80335744..80335879,80362754..80362884,80583093..80583183,80869993..80870135,80963517..80963556)]"                                                                                                                                                                                     
##  [9] "lcl|NC_000008.11_cds_XP_016869227.1_45966 [gene=BMP1] [db_xref=GeneID:649] [protein=bone morphogenetic protein 1 isoform X2] [protein_id=XP_016869227.1] [location=join(22165406..22165553,22173602..22173715,22176143..22176313,22176533..22176650,22176961..22177139,22177852..22177957,22179705..22179829,22180368..22180483,22192049..22192151,22194058..22194174,22194445..22194590,22194724..22194919,22195462..22195587,22196680..22196840,22197240..22197420,22199264..22199310)]"
## [10] "lcl|NC_000015.10_cds_XP_016877777.1_77468 [gene=MYEF2] [db_xref=GeneID:50804] [protein=myelin expression factor 2 isoform X9] [protein_id=XP_016877777.1] [location=complement(join(48142908..48143071,48149032..48149083,48149163..48149371,48151100..48151171,48151473..48151571,48151874..48151942,48153792..48153893,48157993..48158056,48158175..48158224,48158769..48158922,48159613..48159804,48165933..48165977))]"

human

## DNAStringSet object of length 114967:
##           width seq                                         names               
##      [1]    918 ATGGTGACTGAATTCATTTT...ATTCTAGTGTAAAGTTTTAG lcl|NC_000001.11_...
##      [2]    402 ATGAGTGACAGCATCAACTT...GACCCAGGCACAGGCATTAG lcl|NC_000001.11_...
##      [3]    402 ATGAGTGACAGCATCAACTT...GACCCAGGCACAGGCATTAG lcl|NC_000001.11_...
##      [4]    402 ATGAGTGACAGCATCAACTT...GACCCAGGCACAGGCATTAG lcl|NC_000001.11_...
##      [5]    402 ATGAGTGACAGCATCAACTT...GACCCAGGCACAGGCATTAG lcl|NC_000001.11_...
##      ...    ... ...
## [114963]    297 ATGCCCCTCATTTACATAAA...TAAACCTACTCCAATGCTAA lcl|NC_012920.1_c...
## [114964]   1378 ATGCTAAAACTAATCGTCCC...CATTACCGGGTTTTCCTCTT lcl|NC_012920.1_c...
## [114965]   1812 ATAACCATGCACACTACTAT...CCCTACTCCTAATCACATAA lcl|NC_012920.1_c...
## [114966]    525 ATGATGTATGCTTTGTTTCT...AGATTGCTCGGGGGAATAGG lcl|NC_012920.1_c...
## [114967]   1141 ATGACCCCAATACGCAAAAC...CAAAATACTCAAATGGGCCT lcl|NC_012920.1_c...

Epigenomica

Para más información sobre GEOquery y consultas de bases de datos de expresión génica ver la siguiente clase y rpub:

https://rpubs.com/DeepenData/622645

#if (!requireNamespace("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#BiocManager::install("GEOquery")
library(GEOquery)

gset = getGEO("GSE36278") # Descarga un dataset de

## Found 1 file(s)

## GSE36278_series_matrix.txt.gz

## Parsed with column specification:
## cols(
##   .default = col_double(),
##   ID_REF = col_character()
## )

## See spec(...) for full column specifications.

## File stored at:

## /tmp/RtmpLnqSq0/GPL13534.soft

## Warning: 65 parsing failures.
##    row     col           expected     actual         file
## 485513 SPOT_ID 1/0/T/F/TRUE/FALSE rs10796216 literal data
## 485514 SPOT_ID 1/0/T/F/TRUE/FALSE rs715359   literal data
## 485515 SPOT_ID 1/0/T/F/TRUE/FALSE rs1040870  literal data
## 485516 SPOT_ID 1/0/T/F/TRUE/FALSE rs10936224 literal data
## 485517 SPOT_ID 1/0/T/F/TRUE/FALSE rs213028   literal data
## ...... ....... .................. .......... ............
## See problems(...) for more details.

fData(gset[[1]])          # Lista las sondas del set

Transcriptomica

inmortal <- getRNA( db       = "refseq", #Más bases de datos
                       organism = "Thermococcus gammatolerans",
                       path     = file.path("_ncbi_downloads","transcriptome") ) %>%  read_rna()

inmortal

## DNAStringSet object of length 52:
##      width seq                                              names               
##  [1]    78 CCCGCGGTAGCCTAGCCTGGGAG...TCAAATCCGGGCCGCGGGACCA lcl|NC_012804.1_t...
##  [2]    77 GCCGGGGTGGTGTAGCCTGGTCA...TCAAATCCCGGCCCCGGCCCCA lcl|NC_012804.1_t...
##  [3]    78 GGGGGCGTGGTGTAGCCTGGTCC...TCAAATCCCCGCGCCCCCACCA lcl|NC_012804.1_t...
##  [4]    76 AGCCCCGTGGTGTAGCGGCCAAG...TCGAATCCCGGCGGGGCTACCA lcl|NC_012804.1_t...
##  [5]    77 GGGCCCGTGGTCTAGACGGTTAT...TCGAATCCCCGCGGGCCCACCA lcl|NC_012804.1_t...
##  ...   ... ...
## [48]    85 GCGGGGGTTGCCGAGCCTGGTCA...GGTTCAAATCCCCGCCCCCGCA lcl|NC_012804.1_t...
## [49]    87 GCCGGGATTGCCTAGCCTGGGAA...TCAAATCCCCGTCCCGGCGCCA lcl|NC_012804.1_t...
## [50]  2876 GCAACTAAGCCGCCTGGTGGATG...GTCCCCGAAAATAGGCGGCCAT lcl|NC_012804.1_r...
## [51]    77 GGGCCGGTAGCTCAGCCTGGGAG...TCGAATCCCGGCCGGTCCACCA lcl|NC_012804.1_t...
## [52]  1513 GTACTCCTTGCTCAATTCCGGTT...CGATCACCTCCTATCGCCGGAA lcl|NC_012804.1_r...

inmortal %>% names() %>% sample(10)

##  [1] "lcl|NC_012804.1_trna_41 [locus_tag=TGAM_RS09680] [db_xref=GeneID:7987160] [product=tRNA-Ser] [location=complement(1835756..1835841)] [gbkey=tRNA]"         
##  [2] "lcl|NC_012804.1_trna_25 [locus_tag=TGAM_RS05685] [db_xref=GeneID:7986995] [product=tRNA-Met] [location=1066698..1066775] [gbkey=tRNA]"                     
##  [3] "lcl|NC_012804.1_trna_38 [locus_tag=TGAM_RS09660] [db_xref=GeneID:7988314] [product=tRNA-Leu] [location=complement(1834145..1834232)] [gbkey=tRNA]"         
##  [4] "lcl|NC_012804.1_rrna_52 [locus_tag=TGAM_RS10410] [db_xref=GeneID:7987171] [product=16S ribosomal RNA] [location=complement(1948874..1950386)] [gbkey=rRNA]"
##  [5] "lcl|NC_012804.1_trna_37 [locus_tag=TGAM_RS09290] [db_xref=GeneID:7987156] [product=tRNA-Ile] [location=1769527..1769604] [gbkey=tRNA]"                     
##  [6] "lcl|NC_012804.1_trna_33 [locus_tag=TGAM_RS09030] [db_xref=GeneID:7987614] [product=tRNA-Ala] [location=1724892..1724969] [gbkey=tRNA]"                     
##  [7] "lcl|NC_012804.1_trna_11 [locus_tag=TGAM_RS02060] [db_xref=GeneID:7987952] [product=tRNA-Ile] [location=complement(391152..391229)] [gbkey=tRNA]"           
##  [8] "lcl|NC_012804.1_trna_42 [locus_tag=TGAM_RS09765] [db_xref=GeneID:7988332] [product=tRNA-Gly] [location=1853611..1853688] [gbkey=tRNA]"                     
##  [9] "lcl|NC_012804.1_trna_51 [locus_tag=TGAM_RS10405] [db_xref=GeneID:7987170] [product=tRNA-Ala] [location=complement(1948755..1948831)] [gbkey=tRNA]"         
## [10] "lcl|NC_012804.1_trna_21 [locus_tag=TGAM_RS03500] [db_xref=GeneID:7987138] [product=tRNA-Glu] [location=644930..645007] [gbkey=tRNA]"

Proteomica

download_species <- c("Thermococcus gammatolerans", 
                      "Thermotoga maritima")
# retrieve these three species from NCBI RefSeq                       
mis.proteomas <- getProteomeSet("refseq", organisms = download_species, path = "set_proteomes")

read_proteome(mis.proteomas[1])

## AAStringSet object of length 2137:
##        width seq                                            names               
##    [1]    51 MARNKPLAKKLRLAKAAKQNRR...TNRKVMTHPKRRHWRRTKLKE WP_010477181.1 MU...
##    [2]    91 MAEEHVVYIGKKPVMNYVLAVI...LPTADGRTANTSTIEIVLEKP WP_010477671.1 MU...
##    [3]    56 MAKADYNKRKPRKFGKGARRCM...LMLCRHCFREVAPKLGFKKYE WP_011250478.1 MU...
##    [4]   102 MQKARIKLASTNIKALNEVTDQ...MRQIMRIRVPEDVTIEIELIS WP_012571711.1 MU...
##    [5]   208 MKTWRRYEEYLLTGEWHVHTNY...EVKTVRGGDVHGVGEFKYFWG WP_012751161.1 PH...
##    ...   ... ...
## [2133]   267 MIGIIFDMDGVVYRGNRPIDGA...PDLVFPSIKELKDYLSTVLGD WP_169302034.1 HA...
## [2134]   246 MRVIFLDLDGTLLGDDYSPENA...KLSHPKAKHISSIEELLGVIP WP_169302035.1 ma...
## [2135]   108 MAQKPIGNSRAKQLPKVFNSEE...AYPKAGSAVAKYVSTIGRWVS WP_169302036.1 hy...
## [2136]   425 MILTRHDTTIQSLAKEYFLKSD...VQEWRDAAKAKYQKVLQEVNG WP_169302037.1 ex...
## [2137]   123 MEVNGRKVAGFALYFLGIALGI...GVVLTTLGLFVYKIGGREDAR WP_169302038.1 hy...

read_proteome(mis.proteomas[2])

## AAStringSet object of length 1808:
##        width seq                                            names               
##    [1]    72 MRKIFTAIIEYDPEKKQYVGMV...EAGDEINLQEFVALEMIEVET WP_004079900.1 MU...
##    [2]    77 MSYLPIVDPKTMEKVLLKLGFQ...RKIIREAGISVEEFKKVLENL WP_004079902.1 ty...
##    [3]    70 METQKEIVFIAVESEDGGYIEK...FDEGAPKYVHARFVKDVTIAV WP_004079908.1 MU...
##    [4]   580 MNLRSIQKILRFYSLIRKRFLV...TFRRIIETYVNESKRIADKDV WP_004079909.1 MU...
##    [5]   144 MEAAVVVAYSYFVLKLEFAISN...LLGHVLFKKIERKSRAEGELV WP_004079914.1 MU...
##    ...   ... ...
## [1804]   247 MGGGKMIKRVKTGIPGMDEILH...HPFEITDKGIVIYPSEGGEGR WP_162487497.1 Ka...
## [1805]   262 MGPVDIGLIQLLSAYIFVVVLM...VSVFLYLGYKAFFNRENQLIV WP_164924970.1 ir...
## [1806]   280 MKKILTIVRYILIAICLIFFLF...PVVVFALVAQRYLIRGLTSER WP_164924971.1 AB...
## [1807]   246 MLKWLDSNPSIQELKKFAKSLG...FPSGQGMMAQGIMWEIFRSGR WP_164924972.1 DU...
## [1808]   525 MTGRFLKIIIKKATENLLKHRD...NLDLEIYEGGQPHYPYLMLLQ WP_164924973.1 DA...

Proteómica Importing de ProteomExchange R for Proteomics

Estudios diferenciales

#BiocManager::install()
#BiocManager::install("RforProteomics",
#                     ask = F, 
#                     dependencies = TRUE,
#                     type = "source",
#                     checkBuilt = TRUE)

## Experiment information
library("rpx")
px1 <- PXDataset("PXD000001")
pxfiles(px1)

##  [1] "F063721.dat"                                                         
##  [2] "F063721.dat-mztab.txt"                                               
##  [3] "PRIDE_Exp_Complete_Ac_22134.xml.gz"                                  
##  [4] "PRIDE_Exp_mzData_Ac_22134.xml.gz"                                    
##  [5] "PXD000001_mztab.txt"                                                 
##  [6] "README.txt"                                                          
##  [7] "TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML" 
##  [8] "TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzXML"
##  [9] "TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.mzXML"         
## [10] "TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.raw"           
## [11] "erwinia_carotovora.fasta"                                            
## [12] "generated"

Para instalar MSnbase:

Dentro de la terminal:

sudo apt-cache search libnetcdf 
sudo apt-get update 
sudo apt-get install libnetcdf-c++4-1

Usar la siguiente página omitiendo los pasos de instalación de R 4.0

Para_instalar

#BiocManager::install("MSnbase", ask = T, dependencies = TRUE,   type = "source", checkBuilt = TRUE)
library(magrittr)
library(Biobase)
library(MSnbase)

## Loading required package: mzR

## Loading required package: Rcpp

## Loading required package: S4Vectors

## Loading required package: stats4

## 
## Attaching package: 'S4Vectors'

## The following objects are masked from 'package:dplyr':
## 
##     first, rename

## The following object is masked from 'package:base':
## 
##     expand.grid

## Loading required package: ProtGenerics

## 
## Attaching package: 'ProtGenerics'

## The following object is masked from 'package:stats':
## 
##     smooth

## 
## This is MSnbase version 2.15.6 
##   Visit https://lgatto.github.io/MSnbase/ to get started.

## 
## Attaching package: 'MSnbase'

## The following object is masked from 'package:base':
## 
##     trimws

## Downloading the mzTab data
mztab <- pxget(px1, "PXD000001_mztab.txt")

## Downloading 1 file

## /home/alejandro/DeepenData/Taller_sept_oct_nov_dic_2020/PXD000001_mztab.txt already present.

qnt <- readMzTabData(mztab, what = "PEP", version = "0.9")

## Warning: Version 0.9 is deprecated. Please see '?readMzTabData' and '?MzTab' for
## details.

TMT: https://en.wikipedia.org/wiki/Tandem_mass_tag

sampleNames(qnt) <- reporterNames(TMT6)
#head(exprs(qnt))
qnt <- filterNA(qnt)
processingData(qnt)

## - - - Processing information - - -
## mzTab read: Fri Sep 25 21:15:46 2020 
## Subset [2351,6][1504,6] Fri Sep 25 21:15:46 2020 
## Removed features with more than 0 NAs: Fri Sep 25 21:15:46 2020 
## Dropped featureData's levels Fri Sep 25 21:15:46 2020 
##  MSnbase version: 2.15.6

## combine into proteins
## - using the 'accession' feature meta data
## - sum the peptide intensities
protqnt <- combineFeatures(qnt,
                           groupBy = fData(qnt)$accession,
                           fun = sum)


protqnt %>% exprs %>% tail()

##          TMT6.126 TMT6.127  TMT6.128  TMT6.129 TMT6.130   TMT6.131
## ECA4517  371367.4   376656  439160.7  371917.9   337498   376772.2
## P00489  5086802.6  4970118 5563334.7 5471237.4  2778741  2905920.8
## P00761  3076535.9  3190179 3833943.6 3835475.1  2701778  3646836.3
## P00924  5484890.4  3269353 1850660.8 1062968.7  1596193  5199268.0
## P02769   837322.6  1486133 2796767.6 6210387.7  3511954  1126899.3
## P62894  5498893.0  7031520 6517750.7 6773717.8  7095527 14682351.1

library("RColorBrewer") ## Color palettes
library("ggplot2")  ## Convenient and nice plotting
library("reshape2") ## Flexibly reshape data

cls <- brewer.pal(5, "Set1")
matplot(t(tail(exprs(protqnt), n = 5)), type = "b",
        lty = 1, col = cls,
        ylab = "Protein intensity (summed peptides)",
        xlab = "TMT reporters")
legend("topright", tail(featureNames(protqnt), n=5),
       lty = 1, bty = "n", cex = .8, col = cls)

Primera clase - Taller DeepenData: R intermedio-avanzado

Acevedo A., Alejandro & Muñoz, Manuel. Universidad de Chile. Corresponding: manuel.munoz.g@ug.uchile.cl

24-09-2020