library(TCGAbiolinks)
library(SummarizedExperiment)
query <- GDCquery(project = "TCGA-SARC",
sample.type = "Primary solid Tumor",
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "HTSeq - FPKM",
barcode = c("TCGA-WK-A8Y0","TCGA-MJ-A68H"))
GDCdownload(query)
##
| | 0%
|========================== | 50% ~0 s remaining
|====================================================|100% ~0 s remaining
|====================================================|100% Completed after 0 s
## [1] 60483 3
##
| | 0%
|========================== | 50% ~0 s remaining
|====================================================|100% ~0 s remaining
|====================================================|100% Completed after 0 s
## Starting to add information to samples
## => Add clinical information to samples
## Add FFPE information. More information at:
## => https://cancergenome.nih.gov/cancersselected/biospeccriteria
## => http://gdac.broadinstitute.org/runs/sampleReports/latest/FPPP_FFPE_Cases.html
## => Adding subtype information to samples
## sarc subtype information from:doi:10.1016/j.cell.2017.10.014
## Accessing www.ensembl.org to get gene information
## Downloading genome information (try:0) Using: Human genes (GRCh38.p13)
## Cache found
## From the 60483 genes we couldn't map 3971
## [1] 56512 2
## Accessing www.ensembl.org to get gene information
## Downloading genome information (try:0) Using: Human genes (GRCh38.p13)
## Cache found
# How many are not in the database
data$X1[!gsub("\\.[0-9]*$","",data$X1) %in% gene.information$ensembl_gene_id] %>% length
## [1] 3971
Examples not found:
# list of genes not in database
data$X1[!gsub("\\.[0-9]*$","",data$X1) %in% gene.information$ensembl_gene_id] %>%
as.data.frame()
annotation <- rtracklayer::import.gff3("~/Downloads/gencode.v22.annotation.gff3.gz")
table(data$X1 %in% annotation$gene_id)
##
## FALSE TRUE
## 50 60433
## [1] "ENSGR0000264510.4" "ENSGR0000167393.15" "ENSGR0000223773.5"
## [4] "ENSGR0000223511.4" "ENSGR0000227159.6" "ENSGR0000226179.4"
## [7] "ENSGR0000277120.3" "ENSGR0000237801.4" "ENSGR0000002586.16"
## [10] "ENSGR0000228410.4" "ENSGR0000234958.4" "ENSGR0000236017.6"
## [13] "ENSGR0000265658.4" "ENSGR0000197976.9" "ENSGR0000198223.13"
## [16] "ENSGR0000185960.11" "ENSGR0000263835.4" "ENSGR0000230542.4"
## [19] "ENSGR0000281849.1" "ENSGR0000124333.13" "ENSGR0000124334.15"
## [22] "ENSGR0000169084.11" "ENSGR0000276543.3" "ENSGR0000178605.11"
## [25] "ENSGR0000225661.5" "ENSGR0000185203.10" "ENSGR0000196433.10"
## [28] "ENSGR0000280767.1" "ENSGR0000168939.9" "ENSGR0000169100.11"
## [31] "ENSGR0000237531.4" "ENSGR0000229232.4" "ENSGR0000182484.13"
## [34] "ENSGR0000214717.8" "ENSGR0000236871.5" "ENSGR0000182378.11"
## [37] "ENSGR0000185291.9" "ENSGR0000234622.4" "ENSGR0000169093.13"
## [40] "ENSGR0000205755.9" "ENSGR0000237040.4" "ENSGR0000264819.4"
## [43] "ENSGR0000223484.5" "ENSGR0000182162.8" "ENSGR0000228572.5"
## [46] "ENSGR0000275287.3" "ENSGR0000263980.4" "ENSGR0000270726.4"
## [49] "ENSGR0000223274.4" "ENSGR0000223571.4"
annotation.protein.coding <- annotation[annotation$gene_type == "protein_coding"]
length(unique(annotation.protein.coding$gene_name))
## [1] 19712
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 3.6.1 (2019-07-05)
## os Ubuntu 19.10
## system x86_64, linux-gnu
## ui X11
## language en_US
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2019-12-02
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## ! package * version date lib source
## annotate 1.64.0 2019-10-29 [1] Bioconductor
## AnnotationDbi 1.48.0 2019-10-29 [1] Bioconductor
## aroma.light 3.16.0 2019-10-29 [1] Bioconductor
## askpass 1.1 2019-01-13 [2] CRAN (R 3.6.1)
## assertthat 0.2.1 2019-03-21 [2] CRAN (R 3.6.1)
## backports 1.1.5 2019-10-02 [2] CRAN (R 3.6.1)
## base64enc 0.1-3 2015-07-28 [2] CRAN (R 3.6.1)
## Biobase * 2.46.0 2019-10-29 [1] Bioconductor
## BiocFileCache 1.10.0 2019-10-29 [1] Bioconductor
## BiocGenerics * 0.32.0 2019-10-29 [1] Bioconductor
## BiocParallel * 1.20.0 2019-10-30 [1] Bioconductor
## biomaRt 2.42.0 2019-10-29 [1] Bioconductor
## Biostrings 2.54.0 2019-10-29 [1] Bioconductor
## bit 1.1-14 2018-05-29 [2] CRAN (R 3.6.1)
## bit64 0.9-7 2017-05-08 [2] CRAN (R 3.6.1)
## bitops 1.0-6 2013-08-17 [2] CRAN (R 3.6.1)
## blob 1.2.0 2019-07-09 [2] CRAN (R 3.6.1)
## broom 0.5.2 2019-04-07 [2] CRAN (R 3.6.1)
## callr 3.3.2 2019-09-22 [2] CRAN (R 3.6.1)
## circlize 0.4.8 2019-09-08 [2] CRAN (R 3.6.1)
## cli 1.1.0 2019-03-19 [2] CRAN (R 3.6.1)
## clue 0.3-57 2019-02-25 [2] CRAN (R 3.6.1)
## cluster 2.1.0 2019-06-19 [4] CRAN (R 3.6.1)
## codetools 0.2-16 2018-12-24 [4] CRAN (R 3.6.0)
## colorspace 1.4-1 2019-03-18 [2] CRAN (R 3.6.1)
## ComplexHeatmap 2.2.0 2019-10-29 [2] Bioconductor
## ConsensusClusterPlus 1.50.0 2019-10-29 [1] Bioconductor
## crayon 1.3.4 2017-09-16 [2] CRAN (R 3.6.1)
## curl 4.3 2019-12-02 [2] CRAN (R 3.6.1)
## data.table 1.12.6 2019-10-18 [1] CRAN (R 3.6.1)
## DBI 1.0.0 2018-05-02 [2] CRAN (R 3.6.1)
## dbplyr 1.4.2 2019-06-17 [2] CRAN (R 3.6.1)
## DelayedArray * 0.12.0 2019-10-29 [1] Bioconductor
## desc 1.2.0 2018-05-01 [2] CRAN (R 3.6.1)
## DESeq 1.38.0 2019-10-29 [1] Bioconductor
## devtools 2.2.1 2019-09-24 [2] CRAN (R 3.6.1)
## digest 0.6.22 2019-10-21 [1] CRAN (R 3.6.1)
## doParallel 1.0.15 2019-08-02 [2] CRAN (R 3.6.1)
## downloader 0.4 2015-07-09 [2] CRAN (R 3.6.1)
## dplyr * 0.8.3 2019-07-04 [2] CRAN (R 3.6.1)
## EDASeq 2.20.0 2019-10-29 [1] Bioconductor
## edgeR 3.28.0 2019-10-29 [1] Bioconductor
## ellipsis 0.3.0 2019-09-20 [2] CRAN (R 3.6.1)
## evaluate 0.14 2019-05-28 [2] CRAN (R 3.6.1)
## foreach 1.4.7 2019-07-27 [2] CRAN (R 3.6.1)
## fs 1.3.1 2019-05-06 [2] CRAN (R 3.6.1)
## genefilter 1.68.0 2019-10-29 [1] Bioconductor
## geneplotter 1.64.0 2019-10-29 [1] Bioconductor
## generics 0.0.2 2018-11-29 [2] CRAN (R 3.6.1)
## GenomeInfoDb * 1.22.0 2019-10-29 [1] Bioconductor
## GenomeInfoDbData 1.2.2 2019-11-06 [1] Bioconductor
## GenomicAlignments 1.22.0 2019-10-29 [1] Bioconductor
## GenomicFeatures 1.38.0 2019-10-29 [1] Bioconductor
## GenomicRanges * 1.38.0 2019-10-29 [1] Bioconductor
## GetoptLong 0.1.7 2018-06-10 [2] CRAN (R 3.6.1)
## ggplot2 3.2.1 2019-08-10 [2] CRAN (R 3.6.1)
## ggpubr 0.2.4 2019-11-14 [2] CRAN (R 3.6.1)
## ggrepel 0.8.1 2019-05-07 [2] CRAN (R 3.6.1)
## ggsignif 0.6.0 2019-08-08 [2] CRAN (R 3.6.1)
## ggthemes 4.2.0 2019-05-13 [2] CRAN (R 3.6.1)
## GlobalOptions 0.1.1 2019-09-30 [2] CRAN (R 3.6.1)
## glue 1.3.1 2019-03-12 [2] CRAN (R 3.6.1)
## gridExtra 2.3 2017-09-09 [2] CRAN (R 3.6.1)
## gtable 0.3.0 2019-03-25 [1] CRAN (R 3.6.1)
## hms 0.5.2 2019-10-30 [2] CRAN (R 3.6.1)
## htmltools 0.4.0 2019-10-04 [2] CRAN (R 3.6.1)
## httr 1.4.1 2019-08-05 [2] CRAN (R 3.6.1)
## hwriter 1.3.2 2014-09-10 [2] CRAN (R 3.6.1)
## IRanges * 2.20.0 2019-10-29 [1] Bioconductor
## iterators 1.0.12 2019-07-26 [2] CRAN (R 3.6.1)
## jsonlite 1.6 2018-12-07 [2] CRAN (R 3.6.1)
## km.ci 0.5-2 2009-08-30 [2] CRAN (R 3.6.1)
## KMsurv 0.1-5 2012-12-03 [2] CRAN (R 3.6.1)
## knitr 1.26 2019-11-12 [2] CRAN (R 3.6.1)
## lattice 0.20-38 2018-11-04 [4] CRAN (R 3.6.0)
## latticeExtra 0.6-28 2016-02-09 [2] CRAN (R 3.6.1)
## lazyeval 0.2.2 2019-03-15 [2] CRAN (R 3.6.1)
## lifecycle 0.1.0 2019-08-01 [2] CRAN (R 3.6.1)
## limma 3.42.0 2019-10-29 [1] Bioconductor
## locfit 1.5-9.1 2013-04-20 [2] CRAN (R 3.6.1)
## magrittr 1.5 2014-11-22 [2] CRAN (R 3.6.1)
## matlab 1.0.2 2014-06-24 [2] CRAN (R 3.6.1)
## Matrix 1.2-18 2019-11-27 [2] CRAN (R 3.6.1)
## matrixStats * 0.55.0 2019-09-07 [2] CRAN (R 3.6.1)
## memoise 1.1.0 2017-04-21 [2] CRAN (R 3.6.1)
## mgcv 1.8-31 2019-11-09 [4] CRAN (R 3.6.1)
## munsell 0.5.0 2018-06-12 [2] CRAN (R 3.6.1)
## nlme 3.1-142 2019-11-07 [4] CRAN (R 3.6.1)
## openssl 1.4.1 2019-07-18 [2] CRAN (R 3.6.1)
## pillar 1.4.2 2019-06-29 [2] CRAN (R 3.6.1)
## pkgbuild 1.0.6 2019-10-09 [2] CRAN (R 3.6.1)
## pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 3.6.1)
## pkgload 1.0.2 2018-10-29 [2] CRAN (R 3.6.1)
## plyr 1.8.4 2016-06-08 [2] CRAN (R 3.6.1)
## png 0.1-7 2013-12-03 [2] CRAN (R 3.6.1)
## prettyunits 1.0.2 2015-07-13 [2] CRAN (R 3.6.1)
## processx 3.4.1 2019-07-18 [2] CRAN (R 3.6.1)
## progress 1.2.2 2019-05-16 [2] CRAN (R 3.6.1)
## ps 1.3.0 2018-12-21 [2] CRAN (R 3.6.1)
## purrr 0.3.3 2019-10-18 [1] CRAN (R 3.6.1)
## R.methodsS3 1.7.1 2016-02-16 [2] CRAN (R 3.6.1)
## R.oo 1.23.0 2019-11-03 [1] CRAN (R 3.6.1)
## R.utils 2.9.0 2019-06-13 [2] CRAN (R 3.6.1)
## R6 2.4.1 2019-11-12 [2] CRAN (R 3.6.1)
## rappdirs 0.3.1 2016-03-28 [2] CRAN (R 3.6.1)
## RColorBrewer 1.1-2 2014-12-07 [1] CRAN (R 3.6.1)
## Rcpp 1.0.3 2019-11-08 [2] CRAN (R 3.6.1)
## RCurl 1.95-4.12 2019-03-04 [2] CRAN (R 3.6.1)
## readr 1.3.1 2018-12-21 [2] CRAN (R 3.6.1)
## remotes 2.1.0 2019-06-24 [2] CRAN (R 3.6.1)
## rjson 0.2.20 2018-06-08 [2] CRAN (R 3.6.1)
## rlang 0.4.2 2019-11-23 [2] CRAN (R 3.6.1)
## rmarkdown 1.16 2019-10-01 [1] CRAN (R 3.6.1)
## rprojroot 1.3-2 2018-01-03 [2] CRAN (R 3.6.1)
## Rsamtools 2.2.0 2019-10-29 [1] Bioconductor
## RSQLite 2.1.2 2019-07-24 [2] CRAN (R 3.6.1)
## rtracklayer 1.46.0 2019-10-29 [1] Bioconductor
## rvest 0.3.5 2019-11-08 [2] CRAN (R 3.6.1)
## S4Vectors * 0.24.0 2019-10-29 [1] Bioconductor
## scales 1.1.0 2019-11-18 [2] CRAN (R 3.6.1)
## selectr 0.4-2 2019-11-20 [2] CRAN (R 3.6.1)
## sessioninfo 1.1.1 2018-11-05 [2] CRAN (R 3.6.1)
## shape 1.4.4 2018-02-07 [2] CRAN (R 3.6.1)
## ShortRead 1.44.0 2019-10-29 [1] Bioconductor
## stringi 1.4.3 2019-03-12 [2] CRAN (R 3.6.1)
## stringr 1.4.0 2019-02-10 [2] CRAN (R 3.6.1)
## SummarizedExperiment * 1.16.0 2019-10-29 [1] Bioconductor
## survival 3.1-7 2019-11-09 [4] CRAN (R 3.6.1)
## survminer 0.4.6 2019-09-03 [2] CRAN (R 3.6.1)
## survMisc 0.5.5 2018-07-05 [2] CRAN (R 3.6.1)
## sva 3.34.0 2019-10-29 [1] Bioconductor
## R TCGAbiolinks * 2.14.0 <NA> [2] <NA>
## testthat 2.3.1 2019-12-01 [2] CRAN (R 3.6.1)
## tibble 2.1.3 2019-06-06 [2] CRAN (R 3.6.1)
## tidyr 1.0.0 2019-09-11 [2] CRAN (R 3.6.1)
## tidyselect 0.2.5 2018-10-11 [2] CRAN (R 3.6.1)
## usethis 1.5.1 2019-07-04 [2] CRAN (R 3.6.1)
## vctrs 0.2.0 2019-07-05 [2] CRAN (R 3.6.1)
## withr 2.1.2 2018-03-15 [2] CRAN (R 3.6.1)
## xfun 0.10 2019-10-01 [1] CRAN (R 3.6.1)
## XML 3.98-1.20 2019-06-06 [2] CRAN (R 3.6.1)
## xml2 1.2.2 2019-08-09 [2] CRAN (R 3.6.1)
## xtable 1.8-4 2019-04-21 [2] CRAN (R 3.6.1)
## XVector 0.26.0 2019-10-29 [1] Bioconductor
## yaml 2.2.0 2018-07-25 [2] CRAN (R 3.6.1)
## zeallot 0.1.0 2018-01-28 [2] CRAN (R 3.6.1)
## zlibbioc 1.32.0 2019-10-29 [1] Bioconductor
## zoo 1.8-6 2019-05-28 [2] CRAN (R 3.6.1)
##
## [1] /home/tiagochst/R/x86_64-pc-linux-gnu-library/3.6
## [2] /usr/local/lib/R/site-library
## [3] /usr/lib/R/site-library
## [4] /usr/lib/R/library
##
## R ── Package was removed from disk.