projects <- getGDCprojects()
projects[,c("id", "project_id", "released", "tumor")]
##                           id                project_id released      tumor
## 1                CTSP-DLBCL1               CTSP-DLBCL1     TRUE     DLBCL1
## 2                  TCGA-BRCA                 TCGA-BRCA     TRUE       BRCA
## 3                  TCGA-LUAD                 TCGA-LUAD     TRUE       LUAD
## 4                    CPTAC-3                   CPTAC-3     TRUE          3
## 5                APOLLO-LUAD               APOLLO-LUAD     TRUE       LUAD
## 6                    MATCH-B                   MATCH-B     TRUE          B
## 7                    CMI-ASC                   CMI-ASC     TRUE        ASC
## 8                   MATCH-C1                  MATCH-C1     TRUE         C1
## 9      BEATAML1.0-CRENOLANIB     BEATAML1.0-CRENOLANIB     TRUE CRENOLANIB
## 10              CDDP_EAGLE-1              CDDP_EAGLE-1     TRUE          1
## 11                   CMI-MPC                   CMI-MPC     TRUE        MPC
## 12                  MATCH-S1                  MATCH-S1     TRUE         S1
## 13                   MATCH-W                   MATCH-W     TRUE          W
## 14                 MATCH-Z1D                 MATCH-Z1D     TRUE        Z1D
## 15                 MATCH-Z1A                 MATCH-Z1A     TRUE        Z1A
## 16                   MATCH-Y                   MATCH-Y     TRUE          Y
## 17                   MATCH-U                   MATCH-U     TRUE          U
## 18                 MATCH-Z1B                 MATCH-Z1B     TRUE        Z1B
## 19                  MATCH-S2                  MATCH-S2     TRUE         S2
## 20                     FM-AD                     FM-AD     TRUE         AD
## 21            VAREPOP-APOLLO            VAREPOP-APOLLO     TRUE     APOLLO
## 22                   MATCH-I                   MATCH-I     TRUE          I
## 23                   MATCH-P                   MATCH-P     TRUE          P
## 24                   MATCH-R                   MATCH-R     TRUE          R
## 25                   MATCH-N                   MATCH-N     TRUE          N
## 26                   MATCH-Q                   MATCH-Q     TRUE          Q
## 27                   MATCH-H                   MATCH-H     TRUE          H
## 28                   CMI-MBC                   CMI-MBC     TRUE        MBC
## 29                 MATCH-Z1I                 MATCH-Z1I     TRUE        Z1I
## 30         BEATAML1.0-COHORT         BEATAML1.0-COHORT     TRUE     COHORT
## 31                  OHSU-CNL                  OHSU-CNL     TRUE        CNL
## 32       ORGANOID-PANCREATIC       ORGANOID-PANCREATIC     TRUE PANCREATIC
## 33              NCICCR-DLBCL              NCICCR-DLBCL     TRUE      DLBCL
## 34                   CPTAC-2                   CPTAC-2     TRUE          2
## 35                  TRIO-CRU                  TRIO-CRU     TRUE        CRU
## 36             MMRF-COMMPASS             MMRF-COMMPASS     TRUE   COMMPASS
## 37                WCDT-MCRPC                WCDT-MCRPC     TRUE      MCRPC
## 38                 MP2PRT-WT                 MP2PRT-WT     TRUE         WT
## 39                MP2PRT-ALL                MP2PRT-ALL     TRUE        ALL
## 40                 REBC-THYR                 REBC-THYR     TRUE       THYR
## 41                 APOLLO-OV                 APOLLO-OV     TRUE         OV
## 42                  CCDI-MCI                  CCDI-MCI     TRUE        MCI
## 43                 TCGA-DLBC                 TCGA-DLBC     TRUE       DLBC
## 44                 TCGA-COAD                 TCGA-COAD     TRUE       COAD
## 45                 TCGA-CESC                 TCGA-CESC     TRUE       CESC
## 46                 TCGA-CHOL                 TCGA-CHOL     TRUE       CHOL
## 47                 TCGA-ESCA                 TCGA-ESCA     TRUE       ESCA
## 48                 TCGA-LIHC                 TCGA-LIHC     TRUE       LIHC
## 49                 TCGA-MESO                 TCGA-MESO     TRUE       MESO
## 50                 TCGA-KIRP                 TCGA-KIRP     TRUE       KIRP
## 51                 TCGA-LAML                 TCGA-LAML     TRUE       LAML
## 52                 TCGA-PCPG                 TCGA-PCPG     TRUE       PCPG
## 53                 TCGA-HNSC                 TCGA-HNSC     TRUE       HNSC
## 54          CGCI-HTMCP-DLBCL          CGCI-HTMCP-DLBCL     TRUE      HTMCP
## 55                 TCGA-READ                 TCGA-READ     TRUE       READ
## 56                 TCGA-PAAD                 TCGA-PAAD     TRUE       PAAD
## 57                  TCGA-UCS                  TCGA-UCS     TRUE        UCS
## 58                 TCGA-KIRC                 TCGA-KIRC     TRUE       KIRC
## 59                  TCGA-GBM                  TCGA-GBM     TRUE        GBM
## 60                 TCGA-KICH                 TCGA-KICH     TRUE       KICH
## 61 EXCEPTIONAL_RESPONDERS-ER EXCEPTIONAL_RESPONDERS-ER     TRUE         ER
## 62             CGCI-HTMCP-LC             CGCI-HTMCP-LC     TRUE      HTMCP
## 63                 TARGET-OS                 TARGET-OS     TRUE         OS
## 64             TARGET-ALL-P3             TARGET-ALL-P3     TRUE        ALL
## 65                CGCI-BLGSP                CGCI-BLGSP     TRUE      BLGSP
## 66                 TCGA-THYM                 TCGA-THYM     TRUE       THYM
## 67                  TCGA-UVM                  TCGA-UVM     TRUE        UVM
## 68                 TARGET-RT                 TARGET-RT     TRUE         RT
## 69               TARGET-CCSK               TARGET-CCSK     TRUE       CCSK
## 70                TARGET-NBL                TARGET-NBL     TRUE        NBL
## 71                 TCGA-SKCM                 TCGA-SKCM     TRUE       SKCM
## 72                 TCGA-THCA                 TCGA-THCA     TRUE       THCA
## 73                 TCGA-STAD                 TCGA-STAD     TRUE       STAD
## 74             TARGET-ALL-P2             TARGET-ALL-P2     TRUE        ALL
## 75                  TCGA-ACC                  TCGA-ACC     TRUE        ACC
## 76             TARGET-ALL-P1             TARGET-ALL-P1     TRUE        ALL
## 77                 TARGET-WT                 TARGET-WT     TRUE         WT
## 78                  TCGA-LGG                  TCGA-LGG     TRUE        LGG
## 79                 HCMI-CMDC                 HCMI-CMDC     TRUE       CMDC
## 80                 TCGA-SARC                 TCGA-SARC     TRUE       SARC
## 81             CGCI-HTMCP-CC             CGCI-HTMCP-CC     TRUE      HTMCP
## 82                   TCGA-OV                   TCGA-OV     TRUE         OV
## 83                 TCGA-BLCA                 TCGA-BLCA     TRUE       BLCA
## 84                 TCGA-UCEC                 TCGA-UCEC     TRUE       UCEC
## 85                 TCGA-PRAD                 TCGA-PRAD     TRUE       PRAD
## 86                TARGET-AML                TARGET-AML     TRUE        AML
## 87                 TCGA-TGCT                 TCGA-TGCT     TRUE       TGCT
## 88                 TCGA-LUSC                 TCGA-LUSC     TRUE       LUSC
packageVersion("TCGAbiolinks")
## [1] '2.38.0'
# added 4 lines of code to original chunk
# fixed first two lines in order to filter TCGA projects only

tcga_projects <- projects[grepl("^TCGA-", projects$project_id), ]
id <- tcga_projects$project_id
smpls <- list()
for(i in 1:length(id)){
  temp <- NULL
  query_Target <- NULL # line added for silent error handling
  query_Target <- tryCatch(
    suppressMessages(
      GDCquery(
        project = id[i],
        data.category = "Transcriptome Profiling",
        data.type = "Gene Expression Quantification",
        workflow.type = "STAR - Counts"
      )
    ),
    error = function(e) NULL     
  )
  if(!is.null(query_Target)){
    samplesDown_Target <- getResults(query_Target)
    if("sample_type" %in% colnames(samplesDown_Target)) { # line added to check for missing sample_type
      temp[[1]] <- table(samplesDown_Target$sample_type)
    } else {
      temp[[1]] <- NA
    }
    names(temp) <- id[i]
  } else {
    temp[[1]] <- NA
    names(temp) <- id[i]
  }
  smpls <- c(smpls, temp)
}
smpls
## $`TCGA-BRCA`
## 
##          Metastatic       Primary Tumor Solid Tissue Normal 
##                   7                1111                 113 
## 
## $`TCGA-LUAD`
## 
##       Primary Tumor     Recurrent Tumor Solid Tissue Normal 
##                 540                   2                  59 
## 
## $`TCGA-DLBC`
## 
## Primary Tumor 
##            48 
## 
## $`TCGA-COAD`
## 
##          Metastatic       Primary Tumor     Recurrent Tumor Solid Tissue Normal 
##                   1                 481                   1                  41 
## 
## $`TCGA-CESC`
## 
##          Metastatic       Primary Tumor Solid Tissue Normal 
##                   2                 304                   3 
## 
## $`TCGA-CHOL`
## 
##       Primary Tumor Solid Tissue Normal 
##                  35                   9 
## 
## $`TCGA-ESCA`
## 
##          Metastatic       Primary Tumor Solid Tissue Normal 
##                   1                 184                  13 
## 
## $`TCGA-LIHC`
## 
##       Primary Tumor     Recurrent Tumor Solid Tissue Normal 
##                 371                   3                  50 
## 
## $`TCGA-MESO`
## 
## Primary Tumor 
##            87 
## 
## $`TCGA-KIRP`
## 
## Additional - New Primary            Primary Tumor      Solid Tissue Normal 
##                        1                      290                       32 
## 
## $`TCGA-LAML`
## 
## Primary Blood Derived Cancer - Peripheral Blood 
##                                             151 
## 
## $`TCGA-PCPG`
## 
## Additional - New Primary               Metastatic            Primary Tumor 
##                        3                        2                      179 
##      Solid Tissue Normal 
##                        3 
## 
## $`TCGA-HNSC`
## 
##          Metastatic       Primary Tumor Solid Tissue Normal 
##                   2                 520                  44 
## 
## $`TCGA-READ`
## 
##       Primary Tumor     Recurrent Tumor Solid Tissue Normal 
##                 166                   1                  10 
## 
## $`TCGA-PAAD`
## 
##          Metastatic       Primary Tumor Solid Tissue Normal 
##                   1                 178                   4 
## 
## $`TCGA-UCS`
## 
## Primary Tumor 
##            57 
## 
## $`TCGA-KIRC`
## 
## Additional - New Primary            Primary Tumor      Solid Tissue Normal 
##                        1                      541                       72 
## 
## $`TCGA-GBM`
## 
##       Primary Tumor     Recurrent Tumor Solid Tissue Normal 
##                 372                  14                   5 
## 
## $`TCGA-KICH`
## 
##       Primary Tumor Solid Tissue Normal 
##                  66                  25 
## 
## $`TCGA-THYM`
## 
##       Primary Tumor Solid Tissue Normal 
##                 120                   2 
## 
## $`TCGA-UVM`
## 
## Primary Tumor 
##            80 
## 
## $`TCGA-SKCM`
## 
## Additional Metastatic            Metastatic         Primary Tumor 
##                     1                   368                   103 
##   Solid Tissue Normal 
##                     1 
## 
## $`TCGA-THCA`
## 
##          Metastatic       Primary Tumor Solid Tissue Normal 
##                   8                 505                  59 
## 
## $`TCGA-STAD`
## 
##       Primary Tumor Solid Tissue Normal 
##                 412                  36 
## 
## $`TCGA-ACC`
## 
## Primary Tumor 
##            79 
## 
## $`TCGA-LGG`
## 
##   Primary Tumor Recurrent Tumor 
##             516              18 
## 
## $`TCGA-SARC`
## 
##          Metastatic       Primary Tumor     Recurrent Tumor Solid Tissue Normal 
##                   1                 259                   3                   2 
## 
## $`TCGA-OV`
## 
##   Primary Tumor Recurrent Tumor 
##             426               8 
## 
## $`TCGA-BLCA`
## 
##       Primary Tumor Solid Tissue Normal 
##                 412                  19 
## 
## $`TCGA-UCEC`
## 
##       Primary Tumor     Recurrent Tumor Solid Tissue Normal 
##                 553                   1                  35 
## 
## $`TCGA-PRAD`
## 
##          Metastatic       Primary Tumor Solid Tissue Normal 
##                   1                 501                  52 
## 
## $`TCGA-TGCT`
## 
## Additional - New Primary            Primary Tumor 
##                        6                      150 
## 
## $`TCGA-LUSC`
## 
##       Primary Tumor Solid Tissue Normal 
##                 511                  51
smpls[[1]]
## 
##          Metastatic       Primary Tumor Solid Tissue Normal 
##                   7                1111                 113
posIDs <- c("TCGA-DLBC","TCGA-LUAD","TCGA-COAD","TCGA-BRCA")
as_tibble(projects[projects$id %in% posIDs, ])
## # A tibble: 4 × 10
##   id        primary_site dbgap_accession_number project_id disease_type name    
##   <chr>     <list>       <chr>                  <chr>      <list>       <chr>   
## 1 TCGA-BRCA <chr [1]>    <NA>                   TCGA-BRCA  <chr [9]>    Breast …
## 2 TCGA-LUAD <chr [1]>    <NA>                   TCGA-LUAD  <chr [4]>    Lung Ad…
## 3 TCGA-DLBC <chr [14]>   <NA>                   TCGA-DLBC  <chr [2]>    Lymphoi…
## 4 TCGA-COAD <chr [2]>    <NA>                   TCGA-COAD  <chr [4]>    Colon A…
## # ℹ 4 more variables: releasable <lgl>, state <chr>, released <lgl>,
## #   tumor <chr>
smpls[names(smpls) %in% posIDs[3:4]]
## $`TCGA-BRCA`
## 
##          Metastatic       Primary Tumor Solid Tissue Normal 
##                   7                1111                 113 
## 
## $`TCGA-COAD`
## 
##          Metastatic       Primary Tumor     Recurrent Tumor Solid Tissue Normal 
##                   1                 481                   1                  41
TCGAbiolinks:::getProjectSummary("TCGA-BRCA")
## $file_count
## [1] 70776
## 
## $data_categories
##    file_count case_count                data_category
## 1       21134       1098  Simple Nucleotide Variation
## 2        9282       1098             Sequencing Reads
## 3        5317       1098                  Biospecimen
## 4        2288       1098                     Clinical
## 5       14346       1098        Copy Number Variation
## 6        4876       1097      Transcriptome Profiling
## 7        3714       1097              DNA Methylation
## 8         919        881           Proteome Profiling
## 9        3128        927 Somatic Structural Variation
## 10       5772       1098         Structural Variation
## 
## $case_count
## [1] 1098
## 
## $file_size
## [1] 6.249966e+14
#### Downloading and prepare TARGET CASE ####
TargetSamples <- GDCquery(project = "TCGA-BRCA",
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "STAR - Counts")
## --------------------------------------
## o GDCquery: Searching in GDC database
## --------------------------------------
## Genome of reference: hg38
## --------------------------------------------
## oo Accessing GDC. This might take a while...
## --------------------------------------------
## ooo Project: TCGA-BRCA
## --------------------
## oo Filtering results
## --------------------
## ooo By data.type
## ooo By workflow.type
## ----------------
## oo Checking data
## ----------------
## ooo Checking if there are duplicated cases
## ooo Checking if there are results for the query
## -------------------
## o Preparing output
## -------------------
#### obtain case information ####
CaseInfo <- getResults(TargetSamples)#, cols = c("cases"))
as_tibble(head(CaseInfo))
## # A tibble: 6 × 30
##   id         data_format cases access file_name submitter_id data_category type 
##   <chr>      <chr>       <chr> <chr>  <chr>     <chr>        <chr>         <chr>
## 1 9dc09c86-… TSV         TCGA… open   d1f1743c… da44e611-ff… Transcriptom… gene…
## 2 95668f0b-… TSV         TCGA… open   6365a756… 06cd79cd-aa… Transcriptom… gene…
## 3 461fda5d-… TSV         TCGA… open   30285113… dbf87563-f6… Transcriptom… gene…
## 4 30ff778c-… TSV         TCGA… open   5167da8c… 7c7193d8-75… Transcriptom… gene…
## 5 427a04c9-… TSV         TCGA… open   fead73ce… 9248dd2f-37… Transcriptom… gene…
## 6 0682b5b9-… TSV         TCGA… open   d5066dc8… f123e0b6-4c… Transcriptom… gene…
## # ℹ 22 more variables: platform <chr>, file_size <int>, created_datetime <chr>,
## #   md5sum <chr>, updated_datetime <chr>, file_id <chr>, data_type <chr>,
## #   state <chr>, experimental_strategy <chr>, version <chr>,
## #   data_release <chr>, project <chr>, analysis_id <chr>, analysis_state <chr>,
## #   analysis_submitter_id <chr>, analysis_workflow_link <chr>,
## #   analysis_workflow_type <chr>, analysis_workflow_version <chr>,
## #   sample_type <chr>, is_ffpe <lgl>, cases.submitter_id <chr>, …
#### subset samples so that there is an equal number of cancer and control samples ####
dataPrimary_Target <- TCGAquery_SampleTypes(barcode = CaseInfo$cases, typesample = "TP") # primary tumor
dataNormal_Target <- TCGAquery_SampleTypes(barcode = CaseInfo$cases, typesample = "NT") # normal tissue
dataPrimary_Target <- dataPrimary_Target[1:113]
dataNormal_Target <- dataNormal_Target[1:113]
#### downloaded samples of interest ####
TargetSamples <- GDCquery(project = "TCGA-BRCA",
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "STAR - Counts",
barcode = c(dataPrimary_Target, dataNormal_Target))
## --------------------------------------
## o GDCquery: Searching in GDC database
## --------------------------------------
## Genome of reference: hg38
## --------------------------------------------
## oo Accessing GDC. This might take a while...
## --------------------------------------------
## ooo Project: TCGA-BRCA
## --------------------
## oo Filtering results
## --------------------
## ooo By data.type
## ooo By workflow.type
## ooo By barcode
## ----------------
## oo Checking data
## ----------------
## ooo Checking if there are duplicated cases
## ooo Checking if there are results for the query
## -------------------
## o Preparing output
## -------------------
#### Download the data (Note: Depending on your computer, you may not have enough RAM to process this amount of data.
#### to include 50 cancer and 50 normal tissue samples)
GDCdownload(TargetSamples) # will download 226 files and about 960 MB of data
## Downloading data for project TCGA-BRCA
## Of the 226 files for download 226 already exist.
## All samples have been already downloaded
data <- GDCprepare(TargetSamples)
## |                                                    |  0%                      |                                                    |0.4424779% ~16 s remaining|                                                    |0.8849558% ~11 s remaining|                                                    |1.327434% ~9 s remaining  |                                                    |1.769912% ~8 s remaining  |=                                                   |2.212389% ~8 s remaining  |=                                                   |2.654867% ~7 s remaining  |=                                                   |3.097345% ~7 s remaining  |=                                                   |3.539823% ~7 s remaining  |==                                                  |3.982301% ~7 s remaining  |==                                                  |4.424779% ~7 s remaining  |==                                                  |4.867257% ~6 s remaining  |==                                                  |5.309735% ~6 s remaining  |==                                                  |5.752212% ~6 s remaining  |===                                                 |6.19469% ~6 s remaining   |===                                                 |6.637168% ~6 s remaining  |===                                                 |7.079646% ~6 s remaining  |===                                                 |7.522124% ~6 s remaining  |====                                                |7.964602% ~6 s remaining  |====                                                |8.40708% ~6 s remaining   |====                                                |8.849558% ~6 s remaining  |====                                                |9.292035% ~6 s remaining  |=====                                               |9.734513% ~11 s remaining |=====                                               |10.17699% ~11 s remaining |=====                                               |10.61947% ~11 s remaining |=====                                               |11.06195% ~11 s remaining |=====                                               |11.50442% ~10 s remaining |======                                              |11.9469% ~10 s remaining  |======                                              |12.38938% ~10 s remaining |======                                              |12.83186% ~10 s remaining |======                                              |13.27434% ~9 s remaining  |=======                                             |13.71681% ~9 s remaining  |=======                                             |14.15929% ~9 s remaining  |=======                                             |14.60177% ~9 s remaining  |=======                                             |15.04425% ~9 s remaining  |========                                            |15.48673% ~9 s remaining  |========                                            |15.9292% ~9 s remaining   |========                                            |16.37168% ~11 s remaining |========                                            |16.81416% ~11 s remaining |========                                            |17.25664% ~11 s remaining |=========                                           |17.69912% ~11 s remaining |=========                                           |18.14159% ~10 s remaining |=========                                           |18.58407% ~10 s remaining |=========                                           |19.02655% ~10 s remaining |==========                                          |19.46903% ~10 s remaining |==========                                          |19.9115% ~10 s remaining  |==========                                          |20.35398% ~10 s remaining |==========                                          |20.79646% ~10 s remaining |===========                                         |21.23894% ~9 s remaining  |===========                                         |21.68142% ~9 s remaining  |===========                                         |22.12389% ~9 s remaining  |===========                                         |22.56637% ~9 s remaining  |===========                                         |23.00885% ~9 s remaining  |============                                        |23.45133% ~9 s remaining  |============                                        |23.89381% ~9 s remaining  |============                                        |24.33628% ~10 s remaining |============                                        |24.77876% ~10 s remaining |=============                                       |25.22124% ~10 s remaining |=============                                       |25.66372% ~10 s remaining |=============                                       |26.10619% ~10 s remaining |=============                                       |26.54867% ~10 s remaining |==============                                      |26.99115% ~9 s remaining  |==============                                      |27.43363% ~9 s remaining  |==============                                      |27.87611% ~9 s remaining  |==============                                      |28.31858% ~9 s remaining  |==============                                      |28.76106% ~9 s remaining  |===============                                     |29.20354% ~9 s remaining  |===============                                     |29.64602% ~9 s remaining  |===============                                     |30.0885% ~9 s remaining   |===============                                     |30.53097% ~8 s remaining  |================                                    |30.97345% ~8 s remaining  |================                                    |31.41593% ~8 s remaining  |================                                    |31.85841% ~8 s remaining  |================                                    |32.30088% ~8 s remaining  |=================                                   |32.74336% ~8 s remaining  |=================                                   |33.18584% ~8 s remaining  |=================                                   |33.62832% ~9 s remaining  |=================                                   |34.0708% ~9 s remaining   |=================                                   |34.51327% ~9 s remaining  |==================                                  |34.95575% ~9 s remaining  |==================                                  |35.39823% ~8 s remaining  |==================                                  |35.84071% ~8 s remaining  |==================                                  |36.28319% ~8 s remaining  |===================                                 |36.72566% ~8 s remaining  |===================                                 |37.16814% ~8 s remaining  |===================                                 |37.61062% ~8 s remaining  |===================                                 |38.0531% ~8 s remaining   |====================                                |38.49558% ~8 s remaining  |====================                                |38.93805% ~8 s remaining  |====================                                |39.38053% ~7 s remaining  |====================                                |39.82301% ~7 s remaining  |====================                                |40.26549% ~7 s remaining  |=====================                               |40.70796% ~7 s remaining  |=====================                               |41.15044% ~7 s remaining  |=====================                               |41.59292% ~7 s remaining  |=====================                               |42.0354% ~7 s remaining   |======================                              |42.47788% ~7 s remaining  |======================                              |42.92035% ~7 s remaining  |======================                              |43.36283% ~7 s remaining  |======================                              |43.80531% ~7 s remaining  |=======================                             |44.24779% ~7 s remaining  |=======================                             |44.69027% ~6 s remaining  |=======================                             |45.13274% ~7 s remaining  |=======================                             |45.57522% ~7 s remaining  |=======================                             |46.0177% ~7 s remaining   |========================                            |46.46018% ~7 s remaining  |========================                            |46.90265% ~7 s remaining  |========================                            |47.34513% ~7 s remaining  |========================                            |47.78761% ~7 s remaining  |=========================                           |48.23009% ~6 s remaining  |=========================                           |48.67257% ~6 s remaining  |=========================                           |49.11504% ~6 s remaining  |=========================                           |49.55752% ~6 s remaining  |==========================                          | 50% ~6 s remaining       |==========================                          |50.44248% ~6 s remaining  |==========================                          |50.88496% ~6 s remaining  |==========================                          |51.32743% ~6 s remaining  |==========================                          |51.76991% ~6 s remaining  |===========================                         |52.21239% ~6 s remaining  |===========================                         |52.65487% ~6 s remaining  |===========================                         |53.09735% ~6 s remaining  |===========================                         |53.53982% ~6 s remaining  |============================                        |53.9823% ~5 s remaining   |============================                        |54.42478% ~5 s remaining  |============================                        |54.86726% ~5 s remaining  |============================                        |55.30973% ~5 s remaining  |============================                        |55.75221% ~5 s remaining  |=============================                       |56.19469% ~5 s remaining  |=============================                       |56.63717% ~5 s remaining  |=============================                       |57.07965% ~5 s remaining  |=============================                       |57.52212% ~5 s remaining  |==============================                      |57.9646% ~5 s remaining   |==============================                      |58.40708% ~5 s remaining  |==============================                      |58.84956% ~5 s remaining  |==============================                      |59.29204% ~5 s remaining  |===============================                     |59.73451% ~5 s remaining  |===============================                     |60.17699% ~5 s remaining  |===============================                     |60.61947% ~5 s remaining  |===============================                     |61.06195% ~5 s remaining  |===============================                     |61.50442% ~5 s remaining  |================================                    |61.9469% ~5 s remaining   |================================                    |62.38938% ~4 s remaining  |================================                    |62.83186% ~4 s remaining  |================================                    |63.27434% ~4 s remaining  |=================================                   |63.71681% ~4 s remaining  |=================================                   |64.15929% ~4 s remaining  |=================================                   |64.60177% ~4 s remaining  |=================================                   |65.04425% ~4 s remaining  |==================================                  |65.48673% ~4 s remaining  |==================================                  |65.9292% ~4 s remaining   |==================================                  |66.37168% ~4 s remaining  |==================================                  |66.81416% ~4 s remaining  |==================================                  |67.25664% ~4 s remaining  |===================================                 |67.69912% ~4 s remaining  |===================================                 |68.14159% ~4 s remaining  |===================================                 |68.58407% ~4 s remaining  |===================================                 |69.02655% ~4 s remaining  |====================================                |69.46903% ~3 s remaining  |====================================                |69.9115% ~3 s remaining   |====================================                |70.35398% ~3 s remaining  |====================================                |70.79646% ~3 s remaining  |=====================================               |71.23894% ~3 s remaining  |=====================================               |71.68142% ~3 s remaining  |=====================================               |72.12389% ~3 s remaining  |=====================================               |72.56637% ~3 s remaining  |=====================================               |73.00885% ~3 s remaining  |======================================              |73.45133% ~3 s remaining  |======================================              |73.89381% ~3 s remaining  |======================================              |74.33628% ~3 s remaining  |======================================              |74.77876% ~3 s remaining  |=======================================             |75.22124% ~3 s remaining  |=======================================             |75.66372% ~3 s remaining  |=======================================             |76.10619% ~3 s remaining  |=======================================             |76.54867% ~3 s remaining  |========================================            |76.99115% ~3 s remaining  |========================================            |77.43363% ~3 s remaining  |========================================            |77.87611% ~3 s remaining  |========================================            |78.31858% ~2 s remaining  |========================================            |78.76106% ~2 s remaining  |=========================================           |79.20354% ~2 s remaining  |=========================================           |79.64602% ~2 s remaining  |=========================================           |80.0885% ~2 s remaining   |=========================================           |80.53097% ~2 s remaining  |==========================================          |80.97345% ~2 s remaining  |==========================================          |81.41593% ~2 s remaining  |==========================================          |81.85841% ~2 s remaining  |==========================================          |82.30088% ~2 s remaining  |===========================================         |82.74336% ~2 s remaining  |===========================================         |83.18584% ~2 s remaining  |===========================================         |83.62832% ~2 s remaining  |===========================================         |84.0708% ~2 s remaining   |===========================================         |84.51327% ~2 s remaining  |============================================        |84.95575% ~2 s remaining  |============================================        |85.39823% ~2 s remaining  |============================================        |85.84071% ~2 s remaining  |============================================        |86.28319% ~2 s remaining  |=============================================       |86.72566% ~1 s remaining  |=============================================       |87.16814% ~1 s remaining  |=============================================       |87.61062% ~1 s remaining  |=============================================       |88.0531% ~1 s remaining   |==============================================      |88.49558% ~1 s remaining  |==============================================      |88.93805% ~1 s remaining  |==============================================      |89.38053% ~1 s remaining  |==============================================      |89.82301% ~1 s remaining  |==============================================      |90.26549% ~1 s remaining  |===============================================     |90.70796% ~1 s remaining  |===============================================     |91.15044% ~1 s remaining  |===============================================     |91.59292% ~1 s remaining  |===============================================     |92.0354% ~1 s remaining   |================================================    |92.47788% ~1 s remaining  |================================================    |92.92035% ~1 s remaining  |================================================    |93.36283% ~1 s remaining  |================================================    |93.80531% ~1 s remaining  |=================================================   |94.24779% ~1 s remaining  |=================================================   |94.69027% ~1 s remaining  |=================================================   |95.13274% ~1 s remaining  |=================================================   |95.57522% ~0 s remaining  |=================================================   |96.0177% ~0 s remaining   |==================================================  |96.46018% ~0 s remaining  |==================================================  |96.90265% ~0 s remaining  |==================================================  |97.34513% ~0 s remaining  |==================================================  |97.78761% ~0 s remaining  |=================================================== |98.23009% ~0 s remaining  |=================================================== |98.67257% ~0 s remaining  |=================================================== |99.11504% ~0 s remaining  |=================================================== |99.55752% ~0 s remaining  |====================================================|100% ~0 s remaining       |====================================================|100%                      Completed after 11 s
## Starting to add information to samples
##  => Add clinical information to samples
##  => Adding TCGA molecular information from marker papers
##  => Information will have prefix 'paper_'
## brca subtype information from:doi.org/10.1016/j.ccell.2018.03.014
## Available assays in SummarizedExperiment : 
##   => unstranded
##   => stranded_first
##   => stranded_second
##   => tpm_unstrand
##   => fpkm_unstrand
##   => fpkm_uq_unstrand
assays(data)
## List of length 6
## names(6): unstranded stranded_first ... fpkm_unstrand fpkm_uq_unstrand
as_tibble(colData(data))
## # A tibble: 226 × 93
##    barcode         patient sample shortLetterCode definition sample_submitter_id
##    <chr>           <chr>   <chr>  <chr>           <chr>      <chr>              
##  1 TCGA-EW-A2FS-0… TCGA-E… TCGA-… TP              Primary s… TCGA-EW-A2FS-01A   
##  2 TCGA-OL-A6VR-0… TCGA-O… TCGA-… TP              Primary s… TCGA-OL-A6VR-01A   
##  3 TCGA-E9-A226-0… TCGA-E… TCGA-… TP              Primary s… TCGA-E9-A226-01A   
##  4 TCGA-A8-A08H-0… TCGA-A… TCGA-… TP              Primary s… TCGA-A8-A08H-01A   
##  5 TCGA-D8-A27H-0… TCGA-D… TCGA-… TP              Primary s… TCGA-D8-A27H-01A   
##  6 TCGA-D8-A3Z6-0… TCGA-D… TCGA-… TP              Primary s… TCGA-D8-A3Z6-01A   
##  7 TCGA-B6-A1KN-0… TCGA-B… TCGA-… TP              Primary s… TCGA-B6-A1KN-01A   
##  8 TCGA-BH-A0DL-0… TCGA-B… TCGA-… TP              Primary s… TCGA-BH-A0DL-01A   
##  9 TCGA-A8-A09X-0… TCGA-A… TCGA-… TP              Primary s… TCGA-A8-A09X-01A   
## 10 TCGA-BH-A2L8-0… TCGA-B… TCGA-… TP              Primary s… TCGA-BH-A2L8-01A   
## # ℹ 216 more rows
## # ℹ 87 more variables: tumor_descriptor <chr>, sample_id <chr>,
## #   pathology_report_uuid <chr>, submitter_id <chr>, sample_type <chr>,
## #   specimen_type <chr>, days_to_collection <int>, state <chr>,
## #   initial_weight <dbl>, tissue_type <chr>, preservation_method <chr>,
## #   synchronous_malignancy <chr>, ajcc_pathologic_stage <chr>,
## #   days_to_diagnosis <int>, laterality <chr>, treatments <list>, …
as_tibble(rowData(data))
## # A tibble: 60,660 × 10
##    source type  score phase gene_id            gene_type gene_name level hgnc_id
##    <fct>  <fct> <dbl> <int> <chr>              <chr>     <chr>     <chr> <chr>  
##  1 HAVANA gene     NA    NA ENSG00000000003.15 protein_… TSPAN6    2     HGNC:1…
##  2 HAVANA gene     NA    NA ENSG00000000005.6  protein_… TNMD      2     HGNC:1…
##  3 HAVANA gene     NA    NA ENSG00000000419.13 protein_… DPM1      2     HGNC:3…
##  4 HAVANA gene     NA    NA ENSG00000000457.14 protein_… SCYL3     2     HGNC:1…
##  5 HAVANA gene     NA    NA ENSG00000000460.17 protein_… C1orf112  2     HGNC:2…
##  6 HAVANA gene     NA    NA ENSG00000000938.13 protein_… FGR       2     HGNC:3…
##  7 HAVANA gene     NA    NA ENSG00000000971.16 protein_… CFH       1     HGNC:4…
##  8 HAVANA gene     NA    NA ENSG00000001036.14 protein_… FUCA2     2     HGNC:4…
##  9 HAVANA gene     NA    NA ENSG00000001084.13 protein_… GCLC      1     HGNC:4…
## 10 HAVANA gene     NA    NA ENSG00000001167.14 protein_… NFYA      2     HGNC:7…
## # ℹ 60,650 more rows
## # ℹ 1 more variable: havana_gene <chr>
table(rowData(data)$gene_type)
## 
##                          IG_C_gene                    IG_C_pseudogene 
##                                 14                                  9 
##                          IG_D_gene                          IG_J_gene 
##                                 37                                 18 
##                    IG_J_pseudogene                      IG_pseudogene 
##                                  3                                  1 
##                          IG_V_gene                    IG_V_pseudogene 
##                                145                                187 
##                             lncRNA                              miRNA 
##                              16901                               1881 
##                           misc_RNA                            Mt_rRNA 
##                               2212                                  2 
##                            Mt_tRNA             polymorphic_pseudogene 
##                                 22                                 48 
##               processed_pseudogene                     protein_coding 
##                              10167                              19962 
##                         pseudogene                           ribozyme 
##                                 18                                  8 
##                               rRNA                    rRNA_pseudogene 
##                                 47                                497 
##                             scaRNA                              scRNA 
##                                 49                                  1 
##                             snoRNA                              snRNA 
##                                943                               1901 
##                               sRNA                                TEC 
##                                  5                               1057 
##                          TR_C_gene                          TR_D_gene 
##                                  6                                  4 
##                          TR_J_gene                    TR_J_pseudogene 
##                                 79                                  4 
##                          TR_V_gene                    TR_V_pseudogene 
##                                106                                 33 
##   transcribed_processed_pseudogene     transcribed_unitary_pseudogene 
##                                500                                138 
## transcribed_unprocessed_pseudogene    translated_processed_pseudogene 
##                                939                                  2 
##  translated_unprocessed_pseudogene                 unitary_pseudogene 
##                                  1                                 98 
##             unprocessed_pseudogene                          vault_RNA 
##                               2614                                  1
SECoding <- data[rowData(data)$gene_type == "protein_coding", ]

#### The following function will return the data from specified slots in the summarizedExperiment object ####
dataPrep_Coding <- TCGAanalyze_Preprocessing(
  object   = SECoding,
  cor.cut  = 0.6,
  datatype = "fpkm_unstrand"
)
## Number of outliers: 0
boxplot(dataPrep_Coding, outline = FALSE)

dataNorm_Coding <- TCGAanalyze_Normalization(
  tabDF   = dataPrep_Coding,
  geneInfo = geneInfoHT,
  method   = "geneLength"
)
## I Need about  55 seconds for this Complete Normalization Upper Quantile  [Processing 80k elements /s]
## Step 1 of 4: newSeqExpressionSet ...
## Step 2 of 4: withinLaneNormalization ...
## Step 3 of 4: betweenLaneNormalization ...
## Step 4 of 4: exprs ...
dataFilt_Coding <- TCGAanalyze_Filtering(
  tabDF   = dataPrep_Coding,
  method  = "quantile",
  qnt.cut = 0.25
)

boxplot(dataNorm_Coding, outline = FALSE)

DEGsCoding <- TCGAanalyze_DEA(mat1 = dataFilt_Coding[,dataNormal_Target],
mat2 = dataFilt_Coding[,dataPrimary_Target],
pipeline="limma",
Cond1type = "Normal",
Cond2type = "Tumor",
fdr.cut = 0.01 ,
logFC.cut = 1,
method = "glmLRT", ClinicalDF = data.frame())
## Batch correction skipped since no factors provided
## ----------------------- DEA -------------------------------
## o 113 samples in Cond1type Normal
## o 113 samples in Cond2type Tumor
## o 14971 features as miRNA or genes
## This may take some minutes...

## ----------------------- END DEA -------------------------------
head(DEGsCoding)
##                         logFC   AveExpr         t      P.Value    adj.P.Val
## ENSG00000187824.9   -3.617888  2.806241 -29.66605 1.111120e-79 1.663458e-75
## ENSG00000136158.12 -17.360418 13.400130 -25.30245 1.021027e-67 7.642895e-64
## ENSG00000148053.17 -19.251799 12.448263 -24.58882 1.159118e-65 5.784384e-62
## ENSG00000132561.14 -25.387601 18.288332 -23.61005 8.466446e-63 3.168779e-59
## ENSG00000177098.9   -7.558010  5.750185 -23.38276 3.981548e-62 1.192155e-58
## ENSG00000154065.17  -3.069429  2.404372 -23.02268 4.686785e-61 1.169431e-57
##                           B
## ENSG00000187824.9  170.8503
## ENSG00000136158.12 143.6198
## ENSG00000148053.17 138.9340
## ENSG00000132561.14 132.4016
## ENSG00000177098.9  130.8673
## ENSG00000154065.17 128.4233