projects <- getGDCprojects()
projects[,c("id", "project_id", "released", "tumor")]
## id project_id released tumor
## 1 CTSP-DLBCL1 CTSP-DLBCL1 TRUE DLBCL1
## 2 TCGA-BRCA TCGA-BRCA TRUE BRCA
## 3 TCGA-LUAD TCGA-LUAD TRUE LUAD
## 4 CPTAC-3 CPTAC-3 TRUE 3
## 5 APOLLO-LUAD APOLLO-LUAD TRUE LUAD
## 6 MATCH-B MATCH-B TRUE B
## 7 CMI-ASC CMI-ASC TRUE ASC
## 8 MATCH-C1 MATCH-C1 TRUE C1
## 9 BEATAML1.0-CRENOLANIB BEATAML1.0-CRENOLANIB TRUE CRENOLANIB
## 10 CDDP_EAGLE-1 CDDP_EAGLE-1 TRUE 1
## 11 CMI-MPC CMI-MPC TRUE MPC
## 12 MATCH-S1 MATCH-S1 TRUE S1
## 13 MATCH-W MATCH-W TRUE W
## 14 MATCH-Z1D MATCH-Z1D TRUE Z1D
## 15 MATCH-Z1A MATCH-Z1A TRUE Z1A
## 16 MATCH-Y MATCH-Y TRUE Y
## 17 MATCH-U MATCH-U TRUE U
## 18 MATCH-Z1B MATCH-Z1B TRUE Z1B
## 19 MATCH-S2 MATCH-S2 TRUE S2
## 20 FM-AD FM-AD TRUE AD
## 21 VAREPOP-APOLLO VAREPOP-APOLLO TRUE APOLLO
## 22 MATCH-I MATCH-I TRUE I
## 23 MATCH-P MATCH-P TRUE P
## 24 MATCH-R MATCH-R TRUE R
## 25 MATCH-N MATCH-N TRUE N
## 26 MATCH-Q MATCH-Q TRUE Q
## 27 MATCH-H MATCH-H TRUE H
## 28 CMI-MBC CMI-MBC TRUE MBC
## 29 MATCH-Z1I MATCH-Z1I TRUE Z1I
## 30 BEATAML1.0-COHORT BEATAML1.0-COHORT TRUE COHORT
## 31 OHSU-CNL OHSU-CNL TRUE CNL
## 32 ORGANOID-PANCREATIC ORGANOID-PANCREATIC TRUE PANCREATIC
## 33 NCICCR-DLBCL NCICCR-DLBCL TRUE DLBCL
## 34 CPTAC-2 CPTAC-2 TRUE 2
## 35 TRIO-CRU TRIO-CRU TRUE CRU
## 36 MMRF-COMMPASS MMRF-COMMPASS TRUE COMMPASS
## 37 WCDT-MCRPC WCDT-MCRPC TRUE MCRPC
## 38 MP2PRT-WT MP2PRT-WT TRUE WT
## 39 MP2PRT-ALL MP2PRT-ALL TRUE ALL
## 40 REBC-THYR REBC-THYR TRUE THYR
## 41 APOLLO-OV APOLLO-OV TRUE OV
## 42 CCDI-MCI CCDI-MCI TRUE MCI
## 43 TCGA-DLBC TCGA-DLBC TRUE DLBC
## 44 TCGA-COAD TCGA-COAD TRUE COAD
## 45 TCGA-CESC TCGA-CESC TRUE CESC
## 46 TCGA-CHOL TCGA-CHOL TRUE CHOL
## 47 TCGA-ESCA TCGA-ESCA TRUE ESCA
## 48 TCGA-LIHC TCGA-LIHC TRUE LIHC
## 49 TCGA-MESO TCGA-MESO TRUE MESO
## 50 TCGA-KIRP TCGA-KIRP TRUE KIRP
## 51 TCGA-LAML TCGA-LAML TRUE LAML
## 52 TCGA-PCPG TCGA-PCPG TRUE PCPG
## 53 TCGA-HNSC TCGA-HNSC TRUE HNSC
## 54 CGCI-HTMCP-DLBCL CGCI-HTMCP-DLBCL TRUE HTMCP
## 55 TCGA-READ TCGA-READ TRUE READ
## 56 TCGA-PAAD TCGA-PAAD TRUE PAAD
## 57 TCGA-UCS TCGA-UCS TRUE UCS
## 58 TCGA-KIRC TCGA-KIRC TRUE KIRC
## 59 TCGA-GBM TCGA-GBM TRUE GBM
## 60 TCGA-KICH TCGA-KICH TRUE KICH
## 61 EXCEPTIONAL_RESPONDERS-ER EXCEPTIONAL_RESPONDERS-ER TRUE ER
## 62 CGCI-HTMCP-LC CGCI-HTMCP-LC TRUE HTMCP
## 63 TARGET-OS TARGET-OS TRUE OS
## 64 TARGET-ALL-P3 TARGET-ALL-P3 TRUE ALL
## 65 CGCI-BLGSP CGCI-BLGSP TRUE BLGSP
## 66 TCGA-THYM TCGA-THYM TRUE THYM
## 67 TCGA-UVM TCGA-UVM TRUE UVM
## 68 TARGET-RT TARGET-RT TRUE RT
## 69 TARGET-CCSK TARGET-CCSK TRUE CCSK
## 70 TARGET-NBL TARGET-NBL TRUE NBL
## 71 TCGA-SKCM TCGA-SKCM TRUE SKCM
## 72 TCGA-THCA TCGA-THCA TRUE THCA
## 73 TCGA-STAD TCGA-STAD TRUE STAD
## 74 TARGET-ALL-P2 TARGET-ALL-P2 TRUE ALL
## 75 TCGA-ACC TCGA-ACC TRUE ACC
## 76 TARGET-ALL-P1 TARGET-ALL-P1 TRUE ALL
## 77 TARGET-WT TARGET-WT TRUE WT
## 78 TCGA-LGG TCGA-LGG TRUE LGG
## 79 HCMI-CMDC HCMI-CMDC TRUE CMDC
## 80 TCGA-SARC TCGA-SARC TRUE SARC
## 81 CGCI-HTMCP-CC CGCI-HTMCP-CC TRUE HTMCP
## 82 TCGA-OV TCGA-OV TRUE OV
## 83 TCGA-BLCA TCGA-BLCA TRUE BLCA
## 84 TCGA-UCEC TCGA-UCEC TRUE UCEC
## 85 TCGA-PRAD TCGA-PRAD TRUE PRAD
## 86 TARGET-AML TARGET-AML TRUE AML
## 87 TCGA-TGCT TCGA-TGCT TRUE TGCT
## 88 TCGA-LUSC TCGA-LUSC TRUE LUSC
packageVersion("TCGAbiolinks")
## [1] '2.38.0'
# added 4 lines of code to original chunk
# fixed first two lines in order to filter TCGA projects only
tcga_projects <- projects[grepl("^TCGA-", projects$project_id), ]
id <- tcga_projects$project_id
smpls <- list()
for(i in 1:length(id)){
temp <- NULL
query_Target <- NULL # line added for silent error handling
query_Target <- tryCatch(
suppressMessages(
GDCquery(
project = id[i],
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "STAR - Counts"
)
),
error = function(e) NULL
)
if(!is.null(query_Target)){
samplesDown_Target <- getResults(query_Target)
if("sample_type" %in% colnames(samplesDown_Target)) { # line added to check for missing sample_type
temp[[1]] <- table(samplesDown_Target$sample_type)
} else {
temp[[1]] <- NA
}
names(temp) <- id[i]
} else {
temp[[1]] <- NA
names(temp) <- id[i]
}
smpls <- c(smpls, temp)
}
smpls
## $`TCGA-BRCA`
##
## Metastatic Primary Tumor Solid Tissue Normal
## 7 1111 113
##
## $`TCGA-LUAD`
##
## Primary Tumor Recurrent Tumor Solid Tissue Normal
## 540 2 59
##
## $`TCGA-DLBC`
##
## Primary Tumor
## 48
##
## $`TCGA-COAD`
##
## Metastatic Primary Tumor Recurrent Tumor Solid Tissue Normal
## 1 481 1 41
##
## $`TCGA-CESC`
##
## Metastatic Primary Tumor Solid Tissue Normal
## 2 304 3
##
## $`TCGA-CHOL`
##
## Primary Tumor Solid Tissue Normal
## 35 9
##
## $`TCGA-ESCA`
##
## Metastatic Primary Tumor Solid Tissue Normal
## 1 184 13
##
## $`TCGA-LIHC`
##
## Primary Tumor Recurrent Tumor Solid Tissue Normal
## 371 3 50
##
## $`TCGA-MESO`
##
## Primary Tumor
## 87
##
## $`TCGA-KIRP`
##
## Additional - New Primary Primary Tumor Solid Tissue Normal
## 1 290 32
##
## $`TCGA-LAML`
##
## Primary Blood Derived Cancer - Peripheral Blood
## 151
##
## $`TCGA-PCPG`
##
## Additional - New Primary Metastatic Primary Tumor
## 3 2 179
## Solid Tissue Normal
## 3
##
## $`TCGA-HNSC`
##
## Metastatic Primary Tumor Solid Tissue Normal
## 2 520 44
##
## $`TCGA-READ`
##
## Primary Tumor Recurrent Tumor Solid Tissue Normal
## 166 1 10
##
## $`TCGA-PAAD`
##
## Metastatic Primary Tumor Solid Tissue Normal
## 1 178 4
##
## $`TCGA-UCS`
##
## Primary Tumor
## 57
##
## $`TCGA-KIRC`
##
## Additional - New Primary Primary Tumor Solid Tissue Normal
## 1 541 72
##
## $`TCGA-GBM`
##
## Primary Tumor Recurrent Tumor Solid Tissue Normal
## 372 14 5
##
## $`TCGA-KICH`
##
## Primary Tumor Solid Tissue Normal
## 66 25
##
## $`TCGA-THYM`
##
## Primary Tumor Solid Tissue Normal
## 120 2
##
## $`TCGA-UVM`
##
## Primary Tumor
## 80
##
## $`TCGA-SKCM`
##
## Additional Metastatic Metastatic Primary Tumor
## 1 368 103
## Solid Tissue Normal
## 1
##
## $`TCGA-THCA`
##
## Metastatic Primary Tumor Solid Tissue Normal
## 8 505 59
##
## $`TCGA-STAD`
##
## Primary Tumor Solid Tissue Normal
## 412 36
##
## $`TCGA-ACC`
##
## Primary Tumor
## 79
##
## $`TCGA-LGG`
##
## Primary Tumor Recurrent Tumor
## 516 18
##
## $`TCGA-SARC`
##
## Metastatic Primary Tumor Recurrent Tumor Solid Tissue Normal
## 1 259 3 2
##
## $`TCGA-OV`
##
## Primary Tumor Recurrent Tumor
## 426 8
##
## $`TCGA-BLCA`
##
## Primary Tumor Solid Tissue Normal
## 412 19
##
## $`TCGA-UCEC`
##
## Primary Tumor Recurrent Tumor Solid Tissue Normal
## 553 1 35
##
## $`TCGA-PRAD`
##
## Metastatic Primary Tumor Solid Tissue Normal
## 1 501 52
##
## $`TCGA-TGCT`
##
## Additional - New Primary Primary Tumor
## 6 150
##
## $`TCGA-LUSC`
##
## Primary Tumor Solid Tissue Normal
## 511 51
smpls[[1]]
##
## Metastatic Primary Tumor Solid Tissue Normal
## 7 1111 113
posIDs <- c("TCGA-DLBC","TCGA-LUAD","TCGA-COAD","TCGA-BRCA")
as_tibble(projects[projects$id %in% posIDs, ])
## # A tibble: 4 × 10
## id primary_site dbgap_accession_number project_id disease_type name
## <chr> <list> <chr> <chr> <list> <chr>
## 1 TCGA-BRCA <chr [1]> <NA> TCGA-BRCA <chr [9]> Breast …
## 2 TCGA-LUAD <chr [1]> <NA> TCGA-LUAD <chr [4]> Lung Ad…
## 3 TCGA-DLBC <chr [14]> <NA> TCGA-DLBC <chr [2]> Lymphoi…
## 4 TCGA-COAD <chr [2]> <NA> TCGA-COAD <chr [4]> Colon A…
## # ℹ 4 more variables: releasable <lgl>, state <chr>, released <lgl>,
## # tumor <chr>
smpls[names(smpls) %in% posIDs[3:4]]
## $`TCGA-BRCA`
##
## Metastatic Primary Tumor Solid Tissue Normal
## 7 1111 113
##
## $`TCGA-COAD`
##
## Metastatic Primary Tumor Recurrent Tumor Solid Tissue Normal
## 1 481 1 41
TCGAbiolinks:::getProjectSummary("TCGA-BRCA")
## $file_count
## [1] 70776
##
## $data_categories
## file_count case_count data_category
## 1 21134 1098 Simple Nucleotide Variation
## 2 9282 1098 Sequencing Reads
## 3 5317 1098 Biospecimen
## 4 2288 1098 Clinical
## 5 14346 1098 Copy Number Variation
## 6 4876 1097 Transcriptome Profiling
## 7 3714 1097 DNA Methylation
## 8 919 881 Proteome Profiling
## 9 3128 927 Somatic Structural Variation
## 10 5772 1098 Structural Variation
##
## $case_count
## [1] 1098
##
## $file_size
## [1] 6.249966e+14
#### Downloading and prepare TARGET CASE ####
TargetSamples <- GDCquery(project = "TCGA-BRCA",
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "STAR - Counts")
## --------------------------------------
## o GDCquery: Searching in GDC database
## --------------------------------------
## Genome of reference: hg38
## --------------------------------------------
## oo Accessing GDC. This might take a while...
## --------------------------------------------
## ooo Project: TCGA-BRCA
## --------------------
## oo Filtering results
## --------------------
## ooo By data.type
## ooo By workflow.type
## ----------------
## oo Checking data
## ----------------
## ooo Checking if there are duplicated cases
## ooo Checking if there are results for the query
## -------------------
## o Preparing output
## -------------------
#### obtain case information ####
CaseInfo <- getResults(TargetSamples)#, cols = c("cases"))
as_tibble(head(CaseInfo))
## # A tibble: 6 × 30
## id data_format cases access file_name submitter_id data_category type
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 9dc09c86-… TSV TCGA… open d1f1743c… da44e611-ff… Transcriptom… gene…
## 2 95668f0b-… TSV TCGA… open 6365a756… 06cd79cd-aa… Transcriptom… gene…
## 3 461fda5d-… TSV TCGA… open 30285113… dbf87563-f6… Transcriptom… gene…
## 4 30ff778c-… TSV TCGA… open 5167da8c… 7c7193d8-75… Transcriptom… gene…
## 5 427a04c9-… TSV TCGA… open fead73ce… 9248dd2f-37… Transcriptom… gene…
## 6 0682b5b9-… TSV TCGA… open d5066dc8… f123e0b6-4c… Transcriptom… gene…
## # ℹ 22 more variables: platform <chr>, file_size <int>, created_datetime <chr>,
## # md5sum <chr>, updated_datetime <chr>, file_id <chr>, data_type <chr>,
## # state <chr>, experimental_strategy <chr>, version <chr>,
## # data_release <chr>, project <chr>, analysis_id <chr>, analysis_state <chr>,
## # analysis_submitter_id <chr>, analysis_workflow_link <chr>,
## # analysis_workflow_type <chr>, analysis_workflow_version <chr>,
## # sample_type <chr>, is_ffpe <lgl>, cases.submitter_id <chr>, …
#### subset samples so that there is an equal number of cancer and control samples ####
dataPrimary_Target <- TCGAquery_SampleTypes(barcode = CaseInfo$cases, typesample = "TP") # primary tumor
dataNormal_Target <- TCGAquery_SampleTypes(barcode = CaseInfo$cases, typesample = "NT") # normal tissue
dataPrimary_Target <- dataPrimary_Target[1:113]
dataNormal_Target <- dataNormal_Target[1:113]
#### downloaded samples of interest ####
TargetSamples <- GDCquery(project = "TCGA-BRCA",
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "STAR - Counts",
barcode = c(dataPrimary_Target, dataNormal_Target))
## --------------------------------------
## o GDCquery: Searching in GDC database
## --------------------------------------
## Genome of reference: hg38
## --------------------------------------------
## oo Accessing GDC. This might take a while...
## --------------------------------------------
## ooo Project: TCGA-BRCA
## --------------------
## oo Filtering results
## --------------------
## ooo By data.type
## ooo By workflow.type
## ooo By barcode
## ----------------
## oo Checking data
## ----------------
## ooo Checking if there are duplicated cases
## ooo Checking if there are results for the query
## -------------------
## o Preparing output
## -------------------
#### Download the data (Note: Depending on your computer, you may not have enough RAM to process this amount of data.
#### to include 50 cancer and 50 normal tissue samples)
GDCdownload(TargetSamples) # will download 226 files and about 960 MB of data
## Downloading data for project TCGA-BRCA
## Of the 226 files for download 226 already exist.
## All samples have been already downloaded
data <- GDCprepare(TargetSamples)
## | | 0% | |0.4424779% ~16 s remaining| |0.8849558% ~11 s remaining| |1.327434% ~9 s remaining | |1.769912% ~8 s remaining |= |2.212389% ~8 s remaining |= |2.654867% ~7 s remaining |= |3.097345% ~7 s remaining |= |3.539823% ~7 s remaining |== |3.982301% ~7 s remaining |== |4.424779% ~7 s remaining |== |4.867257% ~6 s remaining |== |5.309735% ~6 s remaining |== |5.752212% ~6 s remaining |=== |6.19469% ~6 s remaining |=== |6.637168% ~6 s remaining |=== |7.079646% ~6 s remaining |=== |7.522124% ~6 s remaining |==== |7.964602% ~6 s remaining |==== |8.40708% ~6 s remaining |==== |8.849558% ~6 s remaining |==== |9.292035% ~6 s remaining |===== |9.734513% ~11 s remaining |===== |10.17699% ~11 s remaining |===== |10.61947% ~11 s remaining |===== |11.06195% ~11 s remaining |===== |11.50442% ~10 s remaining |====== |11.9469% ~10 s remaining |====== |12.38938% ~10 s remaining |====== |12.83186% ~10 s remaining |====== |13.27434% ~9 s remaining |======= |13.71681% ~9 s remaining |======= |14.15929% ~9 s remaining |======= |14.60177% ~9 s remaining |======= |15.04425% ~9 s remaining |======== |15.48673% ~9 s remaining |======== |15.9292% ~9 s remaining |======== |16.37168% ~11 s remaining |======== |16.81416% ~11 s remaining |======== |17.25664% ~11 s remaining |========= |17.69912% ~11 s remaining |========= |18.14159% ~10 s remaining |========= |18.58407% ~10 s remaining |========= |19.02655% ~10 s remaining |========== |19.46903% ~10 s remaining |========== |19.9115% ~10 s remaining |========== |20.35398% ~10 s remaining |========== |20.79646% ~10 s remaining |=========== |21.23894% ~9 s remaining |=========== |21.68142% ~9 s remaining |=========== |22.12389% ~9 s remaining |=========== |22.56637% ~9 s remaining |=========== |23.00885% ~9 s remaining |============ |23.45133% ~9 s remaining |============ |23.89381% ~9 s remaining |============ |24.33628% ~10 s remaining |============ |24.77876% ~10 s remaining |============= |25.22124% ~10 s remaining |============= |25.66372% ~10 s remaining |============= |26.10619% ~10 s remaining |============= |26.54867% ~10 s remaining |============== |26.99115% ~9 s remaining |============== |27.43363% ~9 s remaining |============== |27.87611% ~9 s remaining |============== |28.31858% ~9 s remaining |============== |28.76106% ~9 s remaining |=============== |29.20354% ~9 s remaining |=============== |29.64602% ~9 s remaining |=============== |30.0885% ~9 s remaining |=============== |30.53097% ~8 s remaining |================ |30.97345% ~8 s remaining |================ |31.41593% ~8 s remaining |================ |31.85841% ~8 s remaining |================ |32.30088% ~8 s remaining |================= |32.74336% ~8 s remaining |================= |33.18584% ~8 s remaining |================= |33.62832% ~9 s remaining |================= |34.0708% ~9 s remaining |================= |34.51327% ~9 s remaining |================== |34.95575% ~9 s remaining |================== |35.39823% ~8 s remaining |================== |35.84071% ~8 s remaining |================== |36.28319% ~8 s remaining |=================== |36.72566% ~8 s remaining |=================== |37.16814% ~8 s remaining |=================== |37.61062% ~8 s remaining |=================== |38.0531% ~8 s remaining |==================== |38.49558% ~8 s remaining |==================== |38.93805% ~8 s remaining |==================== |39.38053% ~7 s remaining |==================== |39.82301% ~7 s remaining |==================== |40.26549% ~7 s remaining |===================== |40.70796% ~7 s remaining |===================== |41.15044% ~7 s remaining |===================== |41.59292% ~7 s remaining |===================== |42.0354% ~7 s remaining |====================== |42.47788% ~7 s remaining |====================== |42.92035% ~7 s remaining |====================== |43.36283% ~7 s remaining |====================== |43.80531% ~7 s remaining |======================= |44.24779% ~7 s remaining |======================= |44.69027% ~6 s remaining |======================= |45.13274% ~7 s remaining |======================= |45.57522% ~7 s remaining |======================= |46.0177% ~7 s remaining |======================== |46.46018% ~7 s remaining |======================== |46.90265% ~7 s remaining |======================== |47.34513% ~7 s remaining |======================== |47.78761% ~7 s remaining |========================= |48.23009% ~6 s remaining |========================= |48.67257% ~6 s remaining |========================= |49.11504% ~6 s remaining |========================= |49.55752% ~6 s remaining |========================== | 50% ~6 s remaining |========================== |50.44248% ~6 s remaining |========================== |50.88496% ~6 s remaining |========================== |51.32743% ~6 s remaining |========================== |51.76991% ~6 s remaining |=========================== |52.21239% ~6 s remaining |=========================== |52.65487% ~6 s remaining |=========================== |53.09735% ~6 s remaining |=========================== |53.53982% ~6 s remaining |============================ |53.9823% ~5 s remaining |============================ |54.42478% ~5 s remaining |============================ |54.86726% ~5 s remaining |============================ |55.30973% ~5 s remaining |============================ |55.75221% ~5 s remaining |============================= |56.19469% ~5 s remaining |============================= |56.63717% ~5 s remaining |============================= |57.07965% ~5 s remaining |============================= |57.52212% ~5 s remaining |============================== |57.9646% ~5 s remaining |============================== |58.40708% ~5 s remaining |============================== |58.84956% ~5 s remaining |============================== |59.29204% ~5 s remaining |=============================== |59.73451% ~5 s remaining |=============================== |60.17699% ~5 s remaining |=============================== |60.61947% ~5 s remaining |=============================== |61.06195% ~5 s remaining |=============================== |61.50442% ~5 s remaining |================================ |61.9469% ~5 s remaining |================================ |62.38938% ~4 s remaining |================================ |62.83186% ~4 s remaining |================================ |63.27434% ~4 s remaining |================================= |63.71681% ~4 s remaining |================================= |64.15929% ~4 s remaining |================================= |64.60177% ~4 s remaining |================================= |65.04425% ~4 s remaining |================================== |65.48673% ~4 s remaining |================================== |65.9292% ~4 s remaining |================================== |66.37168% ~4 s remaining |================================== |66.81416% ~4 s remaining |================================== |67.25664% ~4 s remaining |=================================== |67.69912% ~4 s remaining |=================================== |68.14159% ~4 s remaining |=================================== |68.58407% ~4 s remaining |=================================== |69.02655% ~4 s remaining |==================================== |69.46903% ~3 s remaining |==================================== |69.9115% ~3 s remaining |==================================== |70.35398% ~3 s remaining |==================================== |70.79646% ~3 s remaining |===================================== |71.23894% ~3 s remaining |===================================== |71.68142% ~3 s remaining |===================================== |72.12389% ~3 s remaining |===================================== |72.56637% ~3 s remaining |===================================== |73.00885% ~3 s remaining |====================================== |73.45133% ~3 s remaining |====================================== |73.89381% ~3 s remaining |====================================== |74.33628% ~3 s remaining |====================================== |74.77876% ~3 s remaining |======================================= |75.22124% ~3 s remaining |======================================= |75.66372% ~3 s remaining |======================================= |76.10619% ~3 s remaining |======================================= |76.54867% ~3 s remaining |======================================== |76.99115% ~3 s remaining |======================================== |77.43363% ~3 s remaining |======================================== |77.87611% ~3 s remaining |======================================== |78.31858% ~2 s remaining |======================================== |78.76106% ~2 s remaining |========================================= |79.20354% ~2 s remaining |========================================= |79.64602% ~2 s remaining |========================================= |80.0885% ~2 s remaining |========================================= |80.53097% ~2 s remaining |========================================== |80.97345% ~2 s remaining |========================================== |81.41593% ~2 s remaining |========================================== |81.85841% ~2 s remaining |========================================== |82.30088% ~2 s remaining |=========================================== |82.74336% ~2 s remaining |=========================================== |83.18584% ~2 s remaining |=========================================== |83.62832% ~2 s remaining |=========================================== |84.0708% ~2 s remaining |=========================================== |84.51327% ~2 s remaining |============================================ |84.95575% ~2 s remaining |============================================ |85.39823% ~2 s remaining |============================================ |85.84071% ~2 s remaining |============================================ |86.28319% ~2 s remaining |============================================= |86.72566% ~1 s remaining |============================================= |87.16814% ~1 s remaining |============================================= |87.61062% ~1 s remaining |============================================= |88.0531% ~1 s remaining |============================================== |88.49558% ~1 s remaining |============================================== |88.93805% ~1 s remaining |============================================== |89.38053% ~1 s remaining |============================================== |89.82301% ~1 s remaining |============================================== |90.26549% ~1 s remaining |=============================================== |90.70796% ~1 s remaining |=============================================== |91.15044% ~1 s remaining |=============================================== |91.59292% ~1 s remaining |=============================================== |92.0354% ~1 s remaining |================================================ |92.47788% ~1 s remaining |================================================ |92.92035% ~1 s remaining |================================================ |93.36283% ~1 s remaining |================================================ |93.80531% ~1 s remaining |================================================= |94.24779% ~1 s remaining |================================================= |94.69027% ~1 s remaining |================================================= |95.13274% ~1 s remaining |================================================= |95.57522% ~0 s remaining |================================================= |96.0177% ~0 s remaining |================================================== |96.46018% ~0 s remaining |================================================== |96.90265% ~0 s remaining |================================================== |97.34513% ~0 s remaining |================================================== |97.78761% ~0 s remaining |=================================================== |98.23009% ~0 s remaining |=================================================== |98.67257% ~0 s remaining |=================================================== |99.11504% ~0 s remaining |=================================================== |99.55752% ~0 s remaining |====================================================|100% ~0 s remaining |====================================================|100% Completed after 11 s
## Starting to add information to samples
## => Add clinical information to samples
## => Adding TCGA molecular information from marker papers
## => Information will have prefix 'paper_'
## brca subtype information from:doi.org/10.1016/j.ccell.2018.03.014
## Available assays in SummarizedExperiment :
## => unstranded
## => stranded_first
## => stranded_second
## => tpm_unstrand
## => fpkm_unstrand
## => fpkm_uq_unstrand
assays(data)
## List of length 6
## names(6): unstranded stranded_first ... fpkm_unstrand fpkm_uq_unstrand
as_tibble(colData(data))
## # A tibble: 226 × 93
## barcode patient sample shortLetterCode definition sample_submitter_id
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 TCGA-EW-A2FS-0… TCGA-E… TCGA-… TP Primary s… TCGA-EW-A2FS-01A
## 2 TCGA-OL-A6VR-0… TCGA-O… TCGA-… TP Primary s… TCGA-OL-A6VR-01A
## 3 TCGA-E9-A226-0… TCGA-E… TCGA-… TP Primary s… TCGA-E9-A226-01A
## 4 TCGA-A8-A08H-0… TCGA-A… TCGA-… TP Primary s… TCGA-A8-A08H-01A
## 5 TCGA-D8-A27H-0… TCGA-D… TCGA-… TP Primary s… TCGA-D8-A27H-01A
## 6 TCGA-D8-A3Z6-0… TCGA-D… TCGA-… TP Primary s… TCGA-D8-A3Z6-01A
## 7 TCGA-B6-A1KN-0… TCGA-B… TCGA-… TP Primary s… TCGA-B6-A1KN-01A
## 8 TCGA-BH-A0DL-0… TCGA-B… TCGA-… TP Primary s… TCGA-BH-A0DL-01A
## 9 TCGA-A8-A09X-0… TCGA-A… TCGA-… TP Primary s… TCGA-A8-A09X-01A
## 10 TCGA-BH-A2L8-0… TCGA-B… TCGA-… TP Primary s… TCGA-BH-A2L8-01A
## # ℹ 216 more rows
## # ℹ 87 more variables: tumor_descriptor <chr>, sample_id <chr>,
## # pathology_report_uuid <chr>, submitter_id <chr>, sample_type <chr>,
## # specimen_type <chr>, days_to_collection <int>, state <chr>,
## # initial_weight <dbl>, tissue_type <chr>, preservation_method <chr>,
## # synchronous_malignancy <chr>, ajcc_pathologic_stage <chr>,
## # days_to_diagnosis <int>, laterality <chr>, treatments <list>, …
as_tibble(rowData(data))
## # A tibble: 60,660 × 10
## source type score phase gene_id gene_type gene_name level hgnc_id
## <fct> <fct> <dbl> <int> <chr> <chr> <chr> <chr> <chr>
## 1 HAVANA gene NA NA ENSG00000000003.15 protein_… TSPAN6 2 HGNC:1…
## 2 HAVANA gene NA NA ENSG00000000005.6 protein_… TNMD 2 HGNC:1…
## 3 HAVANA gene NA NA ENSG00000000419.13 protein_… DPM1 2 HGNC:3…
## 4 HAVANA gene NA NA ENSG00000000457.14 protein_… SCYL3 2 HGNC:1…
## 5 HAVANA gene NA NA ENSG00000000460.17 protein_… C1orf112 2 HGNC:2…
## 6 HAVANA gene NA NA ENSG00000000938.13 protein_… FGR 2 HGNC:3…
## 7 HAVANA gene NA NA ENSG00000000971.16 protein_… CFH 1 HGNC:4…
## 8 HAVANA gene NA NA ENSG00000001036.14 protein_… FUCA2 2 HGNC:4…
## 9 HAVANA gene NA NA ENSG00000001084.13 protein_… GCLC 1 HGNC:4…
## 10 HAVANA gene NA NA ENSG00000001167.14 protein_… NFYA 2 HGNC:7…
## # ℹ 60,650 more rows
## # ℹ 1 more variable: havana_gene <chr>
table(rowData(data)$gene_type)
##
## IG_C_gene IG_C_pseudogene
## 14 9
## IG_D_gene IG_J_gene
## 37 18
## IG_J_pseudogene IG_pseudogene
## 3 1
## IG_V_gene IG_V_pseudogene
## 145 187
## lncRNA miRNA
## 16901 1881
## misc_RNA Mt_rRNA
## 2212 2
## Mt_tRNA polymorphic_pseudogene
## 22 48
## processed_pseudogene protein_coding
## 10167 19962
## pseudogene ribozyme
## 18 8
## rRNA rRNA_pseudogene
## 47 497
## scaRNA scRNA
## 49 1
## snoRNA snRNA
## 943 1901
## sRNA TEC
## 5 1057
## TR_C_gene TR_D_gene
## 6 4
## TR_J_gene TR_J_pseudogene
## 79 4
## TR_V_gene TR_V_pseudogene
## 106 33
## transcribed_processed_pseudogene transcribed_unitary_pseudogene
## 500 138
## transcribed_unprocessed_pseudogene translated_processed_pseudogene
## 939 2
## translated_unprocessed_pseudogene unitary_pseudogene
## 1 98
## unprocessed_pseudogene vault_RNA
## 2614 1
SECoding <- data[rowData(data)$gene_type == "protein_coding", ]
#### The following function will return the data from specified slots in the summarizedExperiment object ####
dataPrep_Coding <- TCGAanalyze_Preprocessing(
object = SECoding,
cor.cut = 0.6,
datatype = "fpkm_unstrand"
)
## Number of outliers: 0
boxplot(dataPrep_Coding, outline = FALSE)

dataNorm_Coding <- TCGAanalyze_Normalization(
tabDF = dataPrep_Coding,
geneInfo = geneInfoHT,
method = "geneLength"
)
## I Need about 55 seconds for this Complete Normalization Upper Quantile [Processing 80k elements /s]
## Step 1 of 4: newSeqExpressionSet ...
## Step 2 of 4: withinLaneNormalization ...
## Step 3 of 4: betweenLaneNormalization ...
## Step 4 of 4: exprs ...
dataFilt_Coding <- TCGAanalyze_Filtering(
tabDF = dataPrep_Coding,
method = "quantile",
qnt.cut = 0.25
)
boxplot(dataNorm_Coding, outline = FALSE)

DEGsCoding <- TCGAanalyze_DEA(mat1 = dataFilt_Coding[,dataNormal_Target],
mat2 = dataFilt_Coding[,dataPrimary_Target],
pipeline="limma",
Cond1type = "Normal",
Cond2type = "Tumor",
fdr.cut = 0.01 ,
logFC.cut = 1,
method = "glmLRT", ClinicalDF = data.frame())
## Batch correction skipped since no factors provided
## ----------------------- DEA -------------------------------
## o 113 samples in Cond1type Normal
## o 113 samples in Cond2type Tumor
## o 14971 features as miRNA or genes
## This may take some minutes...

## ----------------------- END DEA -------------------------------
head(DEGsCoding)
## logFC AveExpr t P.Value adj.P.Val
## ENSG00000187824.9 -3.617888 2.806241 -29.66605 1.111120e-79 1.663458e-75
## ENSG00000136158.12 -17.360418 13.400130 -25.30245 1.021027e-67 7.642895e-64
## ENSG00000148053.17 -19.251799 12.448263 -24.58882 1.159118e-65 5.784384e-62
## ENSG00000132561.14 -25.387601 18.288332 -23.61005 8.466446e-63 3.168779e-59
## ENSG00000177098.9 -7.558010 5.750185 -23.38276 3.981548e-62 1.192155e-58
## ENSG00000154065.17 -3.069429 2.404372 -23.02268 4.686785e-61 1.169431e-57
## B
## ENSG00000187824.9 170.8503
## ENSG00000136158.12 143.6198
## ENSG00000148053.17 138.9340
## ENSG00000132561.14 132.4016
## ENSG00000177098.9 130.8673
## ENSG00000154065.17 128.4233