load survial analysis packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(survival)
library(survminer)
## Loading required package: ggplot2
## Loading required package: ggpubr
load packages using to extract ovrian cancer information from TCGA
library(RTCGA)
## Welcome to the RTCGA (version: 1.16.0).
library(RTCGA.clinical)
clin <- survivalTCGA(OV.clinical,extract.cols = c("patient.age_at_initial_pathologic_diagnosis","patient.race","patient.tumor_tissue_site","patient.lymphatic_invasion"))
## Warning: `mutate_()` was deprecated in dplyr 0.7.0.
## Please use `mutate()` instead.
## See vignette('programming') for more help
head(clin)
## times bcr_patient_barcode patient.vital_status
## 12.81.0 1224 TCGA-04-1331 1
## 12.81.0.1 1247 TCGA-04-1332 1
## 12.81.0.2 55 TCGA-04-1335 1
## 12.81.0.3 1495 TCGA-04-1336 0
## 12.81.0.4 61 TCGA-04-1337 1
## 12.81.0.5 1418 TCGA-04-1338 0
## patient.age_at_initial_pathologic_diagnosis patient.race
## 12.81.0 78 white
## 12.81.0.1 70 white
## 12.81.0.2 60 black or african american
## 12.81.0.3 55 white
## 12.81.0.4 78 white
## 12.81.0.5 78 white
## patient.tumor_tissue_site patient.lymphatic_invasion
## 12.81.0 ovary yes
## 12.81.0.1 ovary no
## 12.81.0.2 ovary no
## 12.81.0.3 ovary no
## 12.81.0.4 ovary no
## 12.81.0.5 ovary <NA>
par(mar=c(4,18,2,2))
barplot(table(clin$patient.race,useNA="always"),col=c("green","light green","pink","cyan","blue"),horiz = TRUE, las=2,names.arg = c(names(table(clin$patient.race)),"NA"), main="Number of OV patients for each race")
par(mar=c(4,4,4,2))
barplot(table(clin$patient.vital_status),names=c("Alive","Death"),main="Number of patients for Alive/Death")
barplot(table(clin$patient.tumor_tissue_site),main="Number of patients for each tumor sites")
barplot(table(clin$patient.lymphatic_invasion,useNA = "always"),names.arg = c("NO","YES","NA"),main="Number of patients with lymphatic invasion or not")
barplot(table(clin$patient.age_at_initial_pathologic_diagnosis),main = "Age distribution")
Observation: 1. The major population is white/caucasian. 2. The number of alive and dead patients are generally balanced. 3. The major tumor location is ovary.
Implement survival analysis and check the association of age_at_initial_pathologic_diagnosis, race, tumor_tissue_site and lymphatic_invasion on survival
Survival analysis on lymphatic_invasion
surv_lym<-coxph(Surv(times, patient.vital_status)~patient.lymphatic_invasion, data=clin)
Survival analysis on age_at_initial_pathologic_diagnosis
clin$patient.age_at_initial_pathologic_diagnosis <- as.numeric(clin$patient.age_at_initial_pathologic_diagnosis)
surv_dt<-coxph(Surv(times, patient.vital_status)~patient.age_at_initial_pathologic_diagnosis, data=clin)
Survival analysis on race
race<-c(1,2,3,4,5,6)
names(race)<-unique(clin$patient.race)
clin$patient.race<-race[clin$patient.race]
surv_ra<-coxph(Surv(times, patient.vital_status)~patient.race, data=clin)
Survival analysis on tumor_tissue_site
ta<-c(1,2,3)
names(ta)<-unique(clin$patient.tumor_tissue_site)
clin$patient.tumor_tissue_site<-ta[clin$patient.tumor_tissue_site]
surv_ta<-coxph(Surv(times, patient.vital_status)~patient.tumor_tissue_site, data=clin)
Compare the contributions to survival rate accross different metrics
par(mar=c(3,14,2,2))
barplot(c(summary(surv_dt)$coef[5],summary(surv_lym)$coef[5],summary(surv_ra)$coef[5],summary(surv_ta)$coef[5]),horiz = TRUE,names.arg = c("age_at_initial_pathologic_diagnosis","lymphatic_invasion", "race","tumor_tissue_site"),las=2,main = "p-value of associations between survival rate and different clinical metrics")
surplotdata<-clin
highAge<-65
lowAge<-50
surplotdata[,"patient.age_at_initial_pathologic_diagnosis"] <- ifelse(surplotdata[,"patient.age_at_initial_pathologic_diagnosis"] >= highAge, 'High', ifelse(surplotdata[,"patient.age_at_initial_pathologic_diagnosis"] <= lowAge, 'Low', 'Mid'))
ggsurvplot(survfit(Surv(times, patient.vital_status) ~ patient.age_at_initial_pathologic_diagnosis,data=surplotdata),data=surplotdata,risk.table = TRUE, pval = TRUE,break.time.by=500,ggtheme = theme_minimal(),risk.table.y.text.col = TRUE,risk.table.y.text = FALSE)
We can have the following conclusion.
1. Age_at_initial_pathologic_diagnosis have the most significant associatoin to survive of Ovarian cancer. It indicates that early detection of the cancer is the key factor to cure the patient or extend patient’s life for ovrian cancer.
2. Lymphatic invasion have a medium association to ovarian cancer’s survival which is in accordance with recent accademic publications on effective immune theray for cancer patient.
3. Patient’s race have a second highest associaion to the patient’s survival. Since we don’t have balanced data on patient’s race. The observation could be a biased result.