rm(list = ls())
library(cgdsr)
## Please send questions to cbioportal@googlegroups.com
library(DT)
##Get list of cancer studies at server
mycgds <- CGDS("http://www.cbioportal.org/")
test(mycgds)
## getCancerStudies... OK
## getCaseLists (1/2) ... OK
## getCaseLists (2/2) ... OK
## getGeneticProfiles (1/2) ... OK
## getGeneticProfiles (2/2) ... OK
## getClinicalData (1/1) ... OK
## getProfileData (1/6) ... OK
## getProfileData (2/6) ... OK
## getProfileData (3/6) ... OK
## getProfileData (4/6) ... OK
## getProfileData (5/6) ... OK
## getProfileData (6/6) ... OK
all_TCGA_studies <- getCancerStudies(mycgds)
DT::datatable(all_TCGA_studies)
#install.packages("xfun")
#same to http://www.cbioportal.org/datasets
##search the dataset you are intested in
hnscc2018 <- "hnsc_tcga_pan_can_atlas_2018"
############################case_list
getCaseLists(mycgds,hnscc2018)[,c(1,2)]
## case_list_id
## 1 hnsc_tcga_pan_can_atlas_2018_all
## 2 hnsc_tcga_pan_can_atlas_2018_3way_complete
## 3 hnsc_tcga_pan_can_atlas_2018_cna
## 4 hnsc_tcga_pan_can_atlas_2018_log2CNA
## 5 hnsc_tcga_pan_can_atlas_2018_microbiome
## 6 hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna
## 7 hnsc_tcga_pan_can_atlas_2018_cnaseq
## 8 hnsc_tcga_pan_can_atlas_2018_sequenced
## 9 hnsc_tcga_pan_can_atlas_2018_rppa
## case_list_name
## 1 All samples
## 2 Complete samples
## 3 Samples with CNA data
## 4 Samples with log2 copy-number data
## 5 Samples with microbiome data
## 6 Samples with mRNA data (RNA Seq V2)
## 7 Samples with mutation and CNA data
## 8 Samples with mutation data
## 9 Samples with protein data (RPPA)
dim(getCaseLists(mycgds,hnscc2018)) #9 5
## [1] 9 5
#View(head(getCaseLists(mycgds,hnscc2018),2))
#############################gene_list
getGeneticProfiles(mycgds,hnscc2018)[,c(1,2)]
## genetic_profile_id
## 1 hnsc_tcga_pan_can_atlas_2018_rppa
## 2 hnsc_tcga_pan_can_atlas_2018_rppa_Zscores
## 3 hnsc_tcga_pan_can_atlas_2018_gistic
## 4 hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna
## 5 hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna_median_Zscores
## 6 hnsc_tcga_pan_can_atlas_2018_log2CNA
## 7 hnsc_tcga_pan_can_atlas_2018_mutations
## 8 hnsc_tcga_pan_can_atlas_2018_fusion
## 9 hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna_median_all_sample_Zscores
## 10 hnsc_tcga_pan_can_atlas_2018_microbiome_signature
## 11 hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna_median_all_sample_ref_normal_Zscores
## genetic_profile_name
## 1 Protein expression (RPPA)
## 2 Protein expression z-scores (RPPA)
## 3 Putative copy-number alterations from GISTIC
## 4 mRNA Expression, RSEM (Batch normalized from Illumina HiSeq_RNASeqV2)
## 5 mRNA expression z-scores relative to diploid samples (RNA Seq V2 RSEM)
## 6 Log2 copy-number values
## 7 Mutations
## 8 Fusions
## 9 mRNA expression z-scores relative to all samples (log RNA Seq V2 RSEM)
## 10 Microbiome Signatures (log RNA Seq CPM)
## 11 mRNA expression z-scores relative to normal samples (log RNA Seq V2 RSEM)
dim(getGeneticProfiles(mycgds,hnscc2018)) #11 6
## [1] 11 6
##choose a gene list you are interested in
choose_genes <- c("ACHE")
mycaselist <- getCaseLists(mycgds,hnscc2018)[grep("mrna",getCaseLists(mycgds,hnscc2018)[,1]),1]
mycaselist
## [1] "hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna"
mygeneticprofile <- getGeneticProfiles(mycgds,hnscc2018)[grep("rna_seq_v2_mrna$",getGeneticProfiles(mycgds,hnscc2018)[,1]),1]
getGeneticProfiles(mycgds,hnscc2018)[,c(1:2)]
## genetic_profile_id
## 1 hnsc_tcga_pan_can_atlas_2018_rppa
## 2 hnsc_tcga_pan_can_atlas_2018_rppa_Zscores
## 3 hnsc_tcga_pan_can_atlas_2018_gistic
## 4 hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna
## 5 hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna_median_Zscores
## 6 hnsc_tcga_pan_can_atlas_2018_log2CNA
## 7 hnsc_tcga_pan_can_atlas_2018_mutations
## 8 hnsc_tcga_pan_can_atlas_2018_fusion
## 9 hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna_median_all_sample_Zscores
## 10 hnsc_tcga_pan_can_atlas_2018_microbiome_signature
## 11 hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna_median_all_sample_ref_normal_Zscores
## genetic_profile_name
## 1 Protein expression (RPPA)
## 2 Protein expression z-scores (RPPA)
## 3 Putative copy-number alterations from GISTIC
## 4 mRNA Expression, RSEM (Batch normalized from Illumina HiSeq_RNASeqV2)
## 5 mRNA expression z-scores relative to diploid samples (RNA Seq V2 RSEM)
## 6 Log2 copy-number values
## 7 Mutations
## 8 Fusions
## 9 mRNA expression z-scores relative to all samples (log RNA Seq V2 RSEM)
## 10 Microbiome Signatures (log RNA Seq CPM)
## 11 mRNA expression z-scores relative to normal samples (log RNA Seq V2 RSEM)
mygeneticprofile
## [1] "hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna"
#View(getGeneticProfiles(mycgds,hnscc2018))
mygeneticprofile
## [1] "hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna"
expr <- getProfileData(mycgds,choose_genes,
mygeneticprofile,mycaselist)
#head(expr)
#View(expr)
DT::datatable(expr)
dim(expr) #[1] 515 2
## [1] 515 1
getCaseLists(mycgds,hnscc2018)[,c(1,2)]
## case_list_id
## 1 hnsc_tcga_pan_can_atlas_2018_all
## 2 hnsc_tcga_pan_can_atlas_2018_3way_complete
## 3 hnsc_tcga_pan_can_atlas_2018_cna
## 4 hnsc_tcga_pan_can_atlas_2018_log2CNA
## 5 hnsc_tcga_pan_can_atlas_2018_microbiome
## 6 hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna
## 7 hnsc_tcga_pan_can_atlas_2018_cnaseq
## 8 hnsc_tcga_pan_can_atlas_2018_sequenced
## 9 hnsc_tcga_pan_can_atlas_2018_rppa
## case_list_name
## 1 All samples
## 2 Complete samples
## 3 Samples with CNA data
## 4 Samples with log2 copy-number data
## 5 Samples with microbiome data
## 6 Samples with mRNA data (RNA Seq V2)
## 7 Samples with mutation and CNA data
## 8 Samples with mutation data
## 9 Samples with protein data (RPPA)
mycaselist_m <- getCaseLists(mycgds,hnscc2018)[8,1]
mycaselist_m
## [1] "hnsc_tcga_pan_can_atlas_2018_sequenced"
getGeneticProfiles(mycgds,hnscc2018)[, c(1,2)]
## genetic_profile_id
## 1 hnsc_tcga_pan_can_atlas_2018_rppa
## 2 hnsc_tcga_pan_can_atlas_2018_rppa_Zscores
## 3 hnsc_tcga_pan_can_atlas_2018_gistic
## 4 hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna
## 5 hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna_median_Zscores
## 6 hnsc_tcga_pan_can_atlas_2018_log2CNA
## 7 hnsc_tcga_pan_can_atlas_2018_mutations
## 8 hnsc_tcga_pan_can_atlas_2018_fusion
## 9 hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna_median_all_sample_Zscores
## 10 hnsc_tcga_pan_can_atlas_2018_microbiome_signature
## 11 hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna_median_all_sample_ref_normal_Zscores
## genetic_profile_name
## 1 Protein expression (RPPA)
## 2 Protein expression z-scores (RPPA)
## 3 Putative copy-number alterations from GISTIC
## 4 mRNA Expression, RSEM (Batch normalized from Illumina HiSeq_RNASeqV2)
## 5 mRNA expression z-scores relative to diploid samples (RNA Seq V2 RSEM)
## 6 Log2 copy-number values
## 7 Mutations
## 8 Fusions
## 9 mRNA expression z-scores relative to all samples (log RNA Seq V2 RSEM)
## 10 Microbiome Signatures (log RNA Seq CPM)
## 11 mRNA expression z-scores relative to normal samples (log RNA Seq V2 RSEM)
mygeneticprofile_m <- getGeneticProfiles(mycgds,hnscc2018)[7,1]
mygeneticprofile_m
## [1] "hnsc_tcga_pan_can_atlas_2018_mutations"
DT::datatable(getGeneticProfiles(mycgds,hnscc2018))
mut_df <- getProfileData(mycgds,choose_genes,
mygeneticprofile_m,mycaselist_m)
#View(mut_df)
#head(mut_df)
DT::datatable(mut_df)
dim(mut_df) #[1] 515 2
## [1] 515 1
length(intersect(row.names(expr),row.names(mut_df)))
## [1] 507
mut_df <- apply(mut_df,2,as.factor)
##remove “NAN”
mut_df[mut_df == "NaN"] = ""
##remove“NA”
mut_df[is.na(mut_df)] = ""
mut_df[mut_df != ''] = "MUT"
dim(mut_df) #[1] 515 2
## [1] 515 1
head(mut_df)
## ACHE
## TCGA.4P.AA8J.01 ""
## TCGA.BA.4074.01 ""
## TCGA.BA.4076.01 ""
## TCGA.BA.4078.01 ""
## TCGA.BA.5149.01 ""
## TCGA.BA.5151.01 ""
mycaselist_c <- getCaseLists(mycgds,hnscc2018)[3,1]
mycaselist_c
## [1] "hnsc_tcga_pan_can_atlas_2018_cna"
mygeneticprofile_c <- getGeneticProfiles(mycgds,hnscc2018)[3,1]
getGeneticProfiles(mycgds,hnscc2018)[,c(1,2)]
## genetic_profile_id
## 1 hnsc_tcga_pan_can_atlas_2018_rppa
## 2 hnsc_tcga_pan_can_atlas_2018_rppa_Zscores
## 3 hnsc_tcga_pan_can_atlas_2018_gistic
## 4 hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna
## 5 hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna_median_Zscores
## 6 hnsc_tcga_pan_can_atlas_2018_log2CNA
## 7 hnsc_tcga_pan_can_atlas_2018_mutations
## 8 hnsc_tcga_pan_can_atlas_2018_fusion
## 9 hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna_median_all_sample_Zscores
## 10 hnsc_tcga_pan_can_atlas_2018_microbiome_signature
## 11 hnsc_tcga_pan_can_atlas_2018_rna_seq_v2_mrna_median_all_sample_ref_normal_Zscores
## genetic_profile_name
## 1 Protein expression (RPPA)
## 2 Protein expression z-scores (RPPA)
## 3 Putative copy-number alterations from GISTIC
## 4 mRNA Expression, RSEM (Batch normalized from Illumina HiSeq_RNASeqV2)
## 5 mRNA expression z-scores relative to diploid samples (RNA Seq V2 RSEM)
## 6 Log2 copy-number values
## 7 Mutations
## 8 Fusions
## 9 mRNA expression z-scores relative to all samples (log RNA Seq V2 RSEM)
## 10 Microbiome Signatures (log RNA Seq CPM)
## 11 mRNA expression z-scores relative to normal samples (log RNA Seq V2 RSEM)
mygeneticprofile_c
## [1] "hnsc_tcga_pan_can_atlas_2018_gistic"
cna <- getProfileData(mycgds,choose_genes,
mygeneticprofile_c,mycaselist_c)
DT::datatable(cna)
##deal with cna
rn <- rownames(cna)
cna <- apply(cna,2,function(x)
as.character(factor(x, levels = c(-2:2),
labels = c("HOMDEL", "HETLOSS", "DIPLOID", "GAIN", "AMP"))))
cna[is.na(cna)] = ""
cna[cna == 'DIPLOID'] = ""
rownames(cna)=rn
DT::datatable(cna)
mycaselist_cli <- getCaseLists(mycgds,hnscc2018)[1,1]
mycaselist_cli
## [1] "hnsc_tcga_pan_can_atlas_2018_all"
myclinicaldata <- getClinicalData(mycgds,mycaselist_cli)
DT::datatable(myclinicaldata)
##############################choose a gene list you are interested in
#10 here
choose_columns=c('AGE','AJCC_PATHOLOGIC_TUMOR_STAGE','DFS_MONTHS','DFS_STATUS',
'GRADE','NEW_TUMOR_EVENT_AFTER_INITIAL_TREATMENT',
'OS_STATUS','OS_MONTHS','RADIATION_THERAPY','SEX')
choose_clinicaldata <- myclinicaldata[,choose_columns]
dim(choose_clinicaldata) #[1] 523 10
## [1] 523 10
DT::datatable(choose_clinicaldata)
##############################survival analysis
library(survival)
dat <- choose_clinicaldata[choose_clinicaldata$OS_MONTHS >0,]
table(dat$OS_STATUS)
##
## 0:LIVING 1:DECEASED
## 303 219
DT::datatable(dat)
#create the survival object, 'DECEASED' means 'occurrence'(估计KM生存曲线)
my.surv <- Surv(dat$OS_MONTHS,dat$OS_STATUS =='1:DECEASED')
dim(my.surv)
## [1] 523 2
head(my.surv)
## [1] 55.856922+ 14.465595+ 12.690272+ 9.599895 14.662853 89.325048
kmfit1 <- survfit(my.surv ~ 1)
library("survminer")
## Loading required package: ggplot2
## Registered S3 methods overwritten by 'tibble':
## method from
## format.tbl pillar
## print.tbl pillar
## Loading required package: ggpubr
ggsurvplot(kmfit1,data = dat)

my.surv <- Surv(dat$OS_MONTHS,dat$OS_STATUS=='1:DECEASED')
kmfit2 <- survfit(my.surv~dat$SEX) #这里只需要将上面的1换成dat$SEX
ggsurvplot(kmfit2,data = dat)

survdiff(my.surv~dat$SEX) #sig
## Call:
## survdiff(formula = my.surv ~ dat$SEX)
##
## n=522, 1 observation deleted due to missingness.
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## dat$SEX=Female 141 71 56.3 3.86 5.25
## dat$SEX=Male 381 148 162.7 1.34 5.25
##
## Chisq= 5.2 on 1 degrees of freedom, p= 0.02
ggsurvplot(kmfit2,data = dat,pval=T)

kmfit3 <- survfit(my.surv~dat$AJCC_PATHOLOGIC_TUMOR_STAGE)
ggsurvplot(kmfit3,data = dat,pval=T)

length(intersect(rownames(choose_clinicaldata),rownames(mut_df)))
## [1] 515
dat2 <- cbind(choose_clinicaldata[rownames(mut_df),c('OS_STATUS','OS_MONTHS')],mut_df)
dat2 <- dat2[dat2$OS_MONTHS > 0,]
dat2 <- dat2[!is.na(dat$OS_STATUS),]
dat2$OS_STATUS <- as.character(dat2$OS_STATUS)
attach(dat2)
DT::datatable(dat2)
###
my.surv <- Surv(OS_MONTHS,OS_STATUS=='1:DECEASED')
kmfit4 <- survfit(my.surv~ACHE,data = dat2)
library("survminer")
ggsurvplot(kmfit4,conf.int =F, pval = T,risk.table =T, ncensor.plot = TRUE,
title="Survival plot by ACHE Mutaiton")
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.

detach(dat2)
length(intersect(rownames(choose_clinicaldata),rownames(cna)))
## [1] 517
dat <- cbind(choose_clinicaldata[rownames(cna),c('OS_STATUS','OS_MONTHS')],cna)
dat <- dat[dat$OS_MONTHS > 0,]
dat <- dat[!is.na(dat$OS_STATUS),]
dat$OS_STATUS <- as.character(dat$OS_STATUS)
attach(dat)
DT::datatable(dat)
my.surv <- Surv(OS_MONTHS,OS_STATUS=='1:DECEASED')
kmfit7 <- survfit(my.surv~ACHE,data = dat)
ggsurvplot(kmfit7,conf.int =F, pval = T,risk.table =T, ncensor.plot = TRUE,
title="Survival plot by CNA")
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.

library(survival)
expr$ACHE_1 <- expr$ACHE
dat3=cbind(choose_clinicaldata[,c('OS_STATUS','OS_MONTHS')],expr[rownames(choose_clinicaldata),])
dat3=dat3[dat3$OS_MONTHS > 0,]
dat3=dat3[!is.na(dat3$OS_STATUS),]
dat3$OS_STATUS=as.character(dat3$OS_STATUS)
DT::datatable(dat3)
head(dat3)
## OS_STATUS OS_MONTHS ACHE ACHE_1
## TCGA.CN.4723.01 0:LIVING 55.856922 30.7574 30.7574
## TCGA.BA.A6DE.01 0:LIVING 14.465595 104.7290 104.7290
## TCGA.C9.A480.01 0:LIVING 12.690272 88.2463 88.2463
## TCGA.CV.6441.01 1:DECEASED 9.599895 557.2100 557.2100
## TCGA.HD.8224.01 1:DECEASED 14.662853 743.6500 743.6500
## TCGA.CV.7411.01 1:DECEASED 89.325048 7.9702 7.9702
library(ggpubr)
p <- ggboxplot(dat3, x="OS_STATUS", y="ACHE", color = "OS_STATUS",
palette = "jco", add = "jitter")
p+stat_compare_means(method = "t.test")
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).
## Warning: Removed 8 rows containing non-finite values (stat_compare_means).
## Warning: Removed 8 rows containing missing values (geom_point).

dat3=dat3[!is.na(dat3$ACHE),]
dat3$ACHE = ifelse(dat3$ACHE > median(dat3$ACHE),'high','low')
attach(dat3)
## The following objects are masked from dat:
##
## ACHE, OS_MONTHS, OS_STATUS
table(dat3$ACHE)
##
## high low
## 257 257
DT::datatable(dat3)
library(survival)
my.surv <- Surv(dat3$OS_MONTHS,dat3$OS_STATUS=='1:DECEASED')
kmfit1 <- survfit(my.surv~ACHE,data = dat3)
getwd()
## [1] "C:/Users/liyix/OneDrive/Desktop"
ggsurvplot(kmfit1,conf.int =F, pval = T,risk.table =T, ncensor.plot = TRUE)
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.

dir_path <- getwd()
ggsurvplot(kmfit1,conf.int =F, pval = T,risk.table =F, ncensor.plot = F)

ggsave(filename = paste0(Sys.Date(),"-","surv.tif"),
plot = last_plot(), device = "tiff", path = dir_path,
width = 10, height = 12, units = "cm",
dpi = 300, limitsize = TRUE,compression = "lzw")
#########################
#ref https://www.jianshu.com/p/fd5e06ec260b
#