query <- GDCquery(project = project,
data.category = "DNA Methylation",
sample.type = "Primary Tumor")
samples <- query$results[[1]][,c("cases","platform")]
samples$bcr_patient_barcode <- substr(samples$cases,1,12)
samples.450k <- samples %>% dplyr::filter(platform == "Illumina Human Methylation 450") %>% pull(bcr_patient_barcode)
samples.27k <- samples %>% dplyr::filter(platform == "Illumina Human Methylation 27") %>% pull(bcr_patient_barcode)
query.normal <- GDCquery(project = project,
data.category = "DNA Methylation",
sample.type = "Solid Tissue Normal")
samples$has_matched_normal <- "No"
samples$has_matched_normal[samples$bcr_patient_barcode %in% substr(query.normal$results[[1]]$cases,1,12)] <- "Yes"
query.exp <- GDCquery(project = "TCGA-COAD",
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "HTSeq - FPKM-UQ",
sample.type = "Primary Tumor")
samples.exp <- query$results[[1]]
samples.exp$bcr_patient_barcode <- substr(samples.exp$cases,1,12)
samples.exp <- samples %>% pull(bcr_patient_barcode)
query.clin <- GDCquery(project = project,
data.category = "Clinical",
data.type = "Clinical Supplement",
data.format = "BCR Biotab")
GDCdownload(query.clin)
clin <- GDCprepare(query.clin)
## Warning: Duplicated column names deduplicated: 'history_other_malignancy' =>
## 'history_other_malignancy_1' [44]
## [1] "clinical_follow_up_v1.0_coad" "clinical_patient_coad"
## [3] "clinical_radiation_coad" "clinical_nte_coad"
## [5] "clinical_drug_coad" "clinical_omf_v4.0_coad"
## [7] "clinical_follow_up_v1.0_nte_coad"
drug <- clin$clinical_drug_coad
drug$pharmaceutical_therapy_drug_name <- stringr::str_to_sentence(drug$pharmaceutical_therapy_drug_name)
sort(unique(stringr::str_to_sentence(drug$pharmaceutical_therapy_drug_name)))
## [1] "[Not available]"
## [2] "5 fu"
## [3] "5- fu"
## [4] "5-fluorouracil"
## [5] "5-fluorouracil + leucovorin"
## [6] "5-fluoruoracil"
## [7] "5-fu"
## [8] "5fu"
## [9] "Avastin"
## [10] "Bevacizumab"
## [11] "C1 folfiri/zaltrap"
## [12] "Calcium foliatum, fluorouracilum, oxaliplatinum, dexamethassone"
## [13] "Camptosar"
## [14] "Capecitabine"
## [15] "Cde_id:2975232"
## [16] "Cetuximab"
## [17] "Cetuximab study drug"
## [18] "Cpt-11"
## [19] "Dexamethasone"
## [20] "Drug_name"
## [21] "Erbitux"
## [22] "Filgrastim (g-csf)"
## [23] "Floxuridine"
## [24] "Fluorouracil"
## [25] "Folfiri"
## [26] "Folfiri/avastin"
## [27] "Folfox"
## [28] "Folinic acid"
## [29] "Irinotecan"
## [30] "Irinotecan + cetuximab"
## [31] "Irinotecan hcl"
## [32] "Leucovorin"
## [33] "Leucovorin calcium"
## [34] "Levcovorin"
## [35] "Mayo 425-20"
## [36] "Mitomycin"
## [37] "Mitomycin c"
## [38] "Oxaliplatin"
## [39] "Oxaliplatin, folinic acid, fluorouracil"
## [40] "Oxaliplatinum+ 5-fu"
## [41] "Oxaliplatinum+5 fluorouracilum"
## [42] "Panitumumab"
## [43] "Pegfilgrastim"
## [44] "Pegfilgrastim (peg g-csf)"
## [45] "Raltitrexed"
## [46] "Regorafenib"
## [47] "Study drug amg 655"
## [48] "Xeloda"
idx <- which(drug$pharmaceutical_therapy_drug_name %in% c("5-fu","5 fu", "5- fu", "5-fluoruoracil", "5-fluorouracil","5fu"))
drug$pharmaceutical_therapy_drug_name[idx] <- c("5-fluorouracil")
samples.with.drug.info <- merge(samples,drug[,-c(1,3,4,5)],all.x = T)
samples.with.drug.info.and.stage <- merge(samples.with.drug.info,clin$clinical_patient_coad[,c("bcr_patient_barcode","ajcc_pathologic_tumor_stage")],all.x = T)
samples.with.drug.info.and.stage$tumor_stage <- gsub("A|B|C","",samples.with.drug.info.and.stage$ajcc_pathologic_tumor_stage)
write.csv(samples.with.drug.info,file = paste0(project,"_samples_with_drug_info.csv"))
drug.with.met <- drug[drug$bcr_patient_barcode %in% samples$bcr_patient_barcode,]
plyr::count(drug.with.met$pharmaceutical_therapy_drug_name) %>% dplyr::filter(grepl("rouracil",x))
drug.with.met <- drug[drug$bcr_patient_barcode %in% intersect(samples$bcr_patient_barcode,samples.exp),]
plyr::count(drug.with.met$pharmaceutical_therapy_drug_name) %>% dplyr::filter(grepl("rouracil",x))
drug.with.met <- drug[drug$bcr_patient_barcode %in% intersect(samples$bcr_patient_barcode,samples.stage_ii_and_iii),]
plyr::count(drug.with.met$pharmaceutical_therapy_drug_name) %>% dplyr::filter(grepl("rouracil",x))
samples.with.drug.info.and.stage %>%
dplyr::filter(platform == "Illumina Human Methylation 450") %>%
dplyr::select(c("bcr_patient_barcode","platform","pharmaceutical_therapy_drug_name","tumor_stage")) %>%
unique() %>%
plyr::count(vars = c("platform","pharmaceutical_therapy_drug_name","tumor_stage")) %>%
DT::datatable(filter = 'top',
style = "bootstrap",
extensions = 'Buttons',
options = list(scrollX = TRUE,
dom = 'Bfrtip',
buttons = I('colvis'),
keys = TRUE,
pageLength = 10),
rownames = FALSE,
caption = "Samples metadata")
samples.with.drug.info.and.stage %>%
dplyr::filter(platform == "Illumina Human Methylation 450") %>%
dplyr::select(c("bcr_patient_barcode","platform","pharmaceutical_therapy_drug_name","tumor_stage")) %>%
unique() %>%
dplyr::filter(tumor_stage %in% c("Stage II","Stage III")) %>%
plyr::count(vars = c("platform","pharmaceutical_therapy_drug_name")) %>%
DT::datatable(filter = 'top',
style = "bootstrap",
extensions = 'Buttons',
options = list(scrollX = TRUE,
dom = 'Bfrtip',
buttons = I('colvis'),
keys = TRUE,
pageLength = 10),
rownames = FALSE,
caption = "Samples metadata")
query <- GDCquery(project = project,
data.category = "DNA Methylation",
sample.type = "Primary Tumor")
samples <- query$results[[1]][,c("cases","platform")]
samples$bcr_patient_barcode <- substr(samples$cases,1,12)
samples.450k <- samples %>% dplyr::filter(platform == "Illumina Human Methylation 450") %>% pull(bcr_patient_barcode)
samples.27k <- samples %>% dplyr::filter(platform == "Illumina Human Methylation 27") %>% pull(bcr_patient_barcode)
query.normal <- GDCquery(project = project,
data.category = "DNA Methylation",
sample.type = "Solid Tissue Normal")
samples$has_matched_normal <- "No"
samples$has_matched_normal[samples$bcr_patient_barcode %in% substr(query.normal$results[[1]]$cases,1,12)] <- "Yes"
query.clin <- GDCquery(project = project,
data.category = "Clinical",
data.type = "Clinical Supplement",
data.format = "BCR Biotab")
GDCdownload(query.clin)
clin <- GDCprepare(query.clin)
## Warning: Duplicated column names deduplicated: 'history_other_malignancy' =>
## 'history_other_malignancy_1' [44]
## [1] "clinical_drug_read" "clinical_follow_up_v1.0_read"
## [3] "clinical_patient_read" "clinical_radiation_read"
## [5] "clinical_nte_read" "clinical_omf_v4.0_read"
## [7] "clinical_follow_up_v1.0_nte_read"
drug <- clin$clinical_drug_read
drug$pharmaceutical_therapy_drug_name <- stringr::str_to_sentence(drug$pharmaceutical_therapy_drug_name)
sort(unique(stringr::str_to_sentence(drug$pharmaceutical_therapy_drug_name)))
## [1] "[Not available]"
## [2] "[Unknown]"
## [3] "5 fluorouracil+leucovorin"
## [4] "5 fluorouracilum+leucovorin"
## [5] "5 fu"
## [6] "5-fluorouracil"
## [7] "5-fu"
## [8] "5-fu + leulov"
## [9] "5fluorouracil+leucovorin"
## [10] "5fluorouracil+oxaciplatina+l-folinian discido"
## [11] "5fu"
## [12] "Aflibercept/ placebo study"
## [13] "Avastin"
## [14] "Bevacizumab"
## [15] "Camptosar"
## [16] "Capecitabine"
## [17] "Capecitabine -xeloda"
## [18] "Cde_id:2975232"
## [19] "Drug_name"
## [20] "Etoposide"
## [21] "Fluorouracil"
## [22] "Fluorouracil iv continuous infusion over 46 hours"
## [23] "Folfox"
## [24] "Folinic acid"
## [25] "Fudr (floxuridine)"
## [26] "Gemcitabine"
## [27] "Irinotecan"
## [28] "Irinotecan hcl"
## [29] "Irinotecan+5-fluorouracilim"
## [30] "Leucovorin"
## [31] "Leucovorin calcium"
## [32] "Leuocvorin"
## [33] "Mayo 425-20"
## [34] "Mitomycin"
## [35] "Oxaliplatin"
## [36] "Oxaliplatinum + 5-fu"
## [37] "Oxaliplatinum+5 fluorouracilum"
## [38] "Oxaliplatinum+5-fu"
## [39] "Pegfilgrastim"
## [40] "Polyplatillen"
## [41] "Xeloda"
## [42] "Zoledronic acid"
idx <- which(drug$pharmaceutical_therapy_drug_name %in% c("5-fu","5 fu", "5- fu", "5-fluoruoracil", "5-fluorouracil","5fu"))
drug$pharmaceutical_therapy_drug_name[idx] <- c("5-fluorouracil")
samples.with.drug.info <- merge(samples,drug[,-c(1,3,4,5)],all.x = T)
samples.with.drug.info.and.stage <- merge(samples.with.drug.info,clin$clinical_patient_read[,c("bcr_patient_barcode","ajcc_pathologic_tumor_stage")],all.x = T)
samples.with.drug.info.and.stage$tumor_stage <- gsub("A|B|C","",samples.with.drug.info.and.stage$ajcc_pathologic_tumor_stage)
write.csv(samples.with.drug.info,file = paste0(project,"_samples_with_drug_info.csv"))
drug.with.met <- drug[drug$bcr_patient_barcode %in% samples$bcr_patient_barcode,]
plyr::count(drug.with.met$pharmaceutical_therapy_drug_name) %>% dplyr::filter(grepl("rouracil",x))
drug.with.met <- drug[drug$bcr_patient_barcode %in% intersect(samples$bcr_patient_barcode,samples.stage_ii_and_iii),]
plyr::count(drug.with.met$pharmaceutical_therapy_drug_name) %>% dplyr::filter(grepl("rouracil",x))
samples.with.drug.info.and.stage %>%
dplyr::filter(platform == "Illumina Human Methylation 450") %>%
dplyr::select(c("bcr_patient_barcode","platform","pharmaceutical_therapy_drug_name","tumor_stage")) %>%
unique() %>%
plyr::count(vars = c("platform","pharmaceutical_therapy_drug_name","tumor_stage")) %>%
DT::datatable(filter = 'top',
style = "bootstrap",
extensions = 'Buttons',
options = list(scrollX = TRUE,
dom = 'Bfrtip',
buttons = I('colvis'),
keys = TRUE,
pageLength = 10),
rownames = FALSE,
caption = "Samples metadata")
samples.with.drug.info.and.stage %>%
dplyr::filter(platform == "Illumina Human Methylation 450") %>%
dplyr::select(c("bcr_patient_barcode","platform","pharmaceutical_therapy_drug_name","tumor_stage")) %>%
unique() %>%
dplyr::filter(tumor_stage %in% c("Stage II","Stage III")) %>%
plyr::count(vars = c("platform","pharmaceutical_therapy_drug_name")) %>%
DT::datatable(filter = 'top',
style = "bootstrap",
extensions = 'Buttons',
options = list(scrollX = TRUE,
dom = 'Bfrtip',
buttons = I('colvis'),
keys = TRUE,
pageLength = 10),
rownames = FALSE,
caption = "Samples metadata")