###############################################################################
#The molecular prognostic score, a classifier for risk straification of HGSOC#
#Sarkar et al, E-mail: siddik.sarkar@iicbres.in
###############################################################################
#TCGA_GEO Ovarian cancer database
library(survival)
library(survminer)
## Loading required package: ggplot2
## Warning in register(): Can't find generic `scale_type` in package ggplot2 to
## register S3 method.
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
library(limma)
library(ggpubr)
library(AnnotationDbi)
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:limma':
##
## plotMA
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.1.1
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
library(org.Hs.eg.db)
##
library(glmnet)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:S4Vectors':
##
## expand
## Loaded glmnet 4.1-3
library(ROCR)
library(survivalROC)
library(pheatmap)
#####################################################
#Collect RDS that are already saved in other folders#
#####################################################
GSE_26712 <- readRDS("~/Desktop/IICB/Glmnet_SurvivalGenes/SE_26712.rds")
GSE_18520 <- readRDS("~/Desktop/IICB/Glmnet_SurvivalGenes/SE_GSE18520.rds")
TCGA.OV <- readRDS("~/Desktop/IICB/Glmnet_SurvivalGenes/TCGA.OV.rds")
GSE_26193<- readRDS("~/Desktop/IICB/Glmnet_SurvivalGenes/GSE26193.rds")
GSE_14764<- readRDS("~/Desktop/IICB/Glmnet_SurvivalGenes/GSE14764.rds")
GSE_63885<- readRDS("~/Desktop/IICB/Glmnet_SurvivalGenes/GSE63885.rds")
#Remove duplicate gene_ids
o<- order(rowSums(TCGA.OV@assays@data$normalized), decreasing = T)
TCGA.OV<- TCGA.OV[o,]
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Warning: package 'MatrixGenerics' was built under R version 4.1.1
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
##
## rowMedians
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 4.1.1
head(assay(TCGA.OV))[, 1:10]
## TCGA-29-1692-01B-01R-0808-01 TCGA-13-1488-01A-01R-0477-01
## EEF1A1 12.88355 12.62906
## RPS3A 13.00679 12.15549
## RPS3A 13.00679 12.15549
## RPS4X 12.52005 12.02120
## RPS27 12.71712 12.04733
## RPL39 12.88478 12.13985
## TCGA-13-1483-01A-01R-0477-01 TCGA-24-2024-01A-02R-0668-01
## EEF1A1 12.74414 13.07086
## RPS3A 12.83631 13.13192
## RPS3A 12.83631 13.13192
## RPS4X 12.98435 12.92062
## RPS27 12.66582 12.92439
## RPL39 12.80318 12.75421
## TCGA-13-0908-01B-01R-0453-01 TCGA-04-1341-01A-01R-0434-01
## EEF1A1 12.33280 12.43170
## RPS3A 12.44088 12.67937
## RPS3A 12.44088 12.67937
## RPS4X 12.15694 12.53605
## RPS27 12.22548 12.79412
## RPL39 12.45119 12.63017
## TCGA-13-1489-02A-01R-0808-01 TCGA-36-1575-01A-01R-0538-01
## EEF1A1 13.02541 12.65636
## RPS3A 13.02833 12.51563
## RPS3A 13.02833 12.51563
## RPS4X 12.86771 12.24898
## RPS27 13.03593 12.08975
## RPL39 13.19170 12.06427
## TCGA-13-0803-01A-01R-0362-01 TCGA-25-2408-01A-01R-0709-01
## EEF1A1 12.61793 12.89962
## RPS3A 12.23207 12.77040
## RPS3A 12.23207 12.77040
## RPS4X 12.48291 12.94298
## RPS27 12.25968 12.78388
## RPL39 12.87438 12.70745
d<- duplicated(rownames(TCGA.OV))
table(d)
## d
## FALSE TRUE
## 10878 702
TCGA.OV<- TCGA.OV[!d,]
###############################
#Similarly for GSE datasets#
###############################
o<- order(rowSums(assay(GSE_26712)), decreasing = T)
GSE_26712<- GSE_26712[o,]
head(assay(GSE_26712))[, 1:10]
## GSM657519_HOSE2237 GSM657520_HOSE2008 GSM657521_HOSE2061
## EEF1A1 13.47900 13.47900 13.11366
## TPT1 13.29893 13.32458 12.68414
## RPL37A 13.00495 12.96124 12.45180
## RPS18 12.95925 12.58468 12.36975
## RPL41 13.17338 13.16965 12.76300
## RPL13A 12.34042 12.05152 11.85875
## GSM657522_HOSE2064 GSM657523_HOSE2085 GSM657524_HOSE2225
## EEF1A1 13.22784 12.90234 13.33805
## TPT1 12.89589 12.68356 13.35606
## RPL37A 12.71304 12.45182 12.93762
## RPS18 12.28234 11.85091 12.91837
## RPL41 13.10460 12.59513 13.20533
## RPL13A 11.77486 11.59822 12.12697
## GSM657525_HOSE2226 GSM657526_HOSE2228 GSM657527_HOSE2230
## EEF1A1 13.41927 13.38104 13.27282
## TPT1 13.27536 13.31731 13.25633
## RPL37A 13.01197 12.94455 12.77350
## RPS18 12.85196 12.78197 12.82530
## RPL41 13.13413 13.19052 13.16812
## RPL13A 12.06184 12.20157 12.11931
## GSM657528_HOSE2234
## EEF1A1 13.48788
## TPT1 13.56379
## RPL37A 12.92421
## RPS18 13.03841
## RPL41 13.12579
## RPL13A 12.38129
d<- duplicated(rownames(GSE_26712))
table(d)
## d
## FALSE
## 12645
GSE_26712<- GSE_26712[!d,]
o<- order(rowSums(assay(GSE_18520)), decreasing = T)
GSE_18520<- GSE_18520[o,]
head(assay(GSE_18520))[, 1:10]
## GSM461348.CEL.gz GSM461349.CEL.gz GSM461350.CEL.gz GSM461351.CEL.gz
## RPL41 14.27943 14.21090 13.90982 14.38014
## EEF1A1 14.22060 13.68581 13.72749 14.34786
## RPL37A 13.98879 13.78062 13.50181 14.08039
## ND4 14.13128 13.68930 13.36868 14.16025
## TPT1 14.12233 13.59100 13.18768 14.05185
## RPS20 13.84227 13.86572 13.32865 13.99806
## GSM461352.CEL.gz GSM461353.CEL.gz GSM461354.CEL.gz GSM461355.CEL.gz
## RPL41 14.40031 14.37727 14.18398 14.12652
## EEF1A1 13.67195 14.27973 14.26522 13.92270
## RPL37A 13.71422 13.98134 14.14058 13.69619
## ND4 13.83797 13.51943 14.49082 13.54138
## TPT1 13.67562 13.65820 13.95461 13.54869
## RPS20 13.63017 13.69952 13.77505 13.76030
## GSM461356.CEL.gz GSM461357.CEL.gz
## RPL41 13.75686 14.72293
## EEF1A1 13.56270 14.59740
## RPL37A 13.39989 14.44599
## ND4 13.36689 14.31416
## TPT1 13.54526 14.27828
## RPS20 13.18255 14.06773
d<- duplicated(rownames(GSE_18520))
table(d)
## d
## FALSE
## 20864
GSE_18520<- GSE_18520[!d,]
o<- order(rowSums(exprs(GSE_26193)), decreasing = T)
GSE_26193<- GSE_26193[o,]
head(exprs(GSE_26193))[, 1:10]
## GSM642933_OC01_ARN0001_s1h1s1_U133p2.CEL.gz
## ND4 14.18139
## COX1 13.58479
## RPL41 14.16913
## ATP6 14.09860
## EEF1A1 14.12426
## RPL37A 13.95109
## GSM642935_OC01_ARN0004_s1h1s1_U133p2.CEL.gz
## ND4 14.13805
## COX1 13.85712
## RPL41 14.24466
## ATP6 13.90537
## EEF1A1 14.01791
## RPL37A 14.08491
## GSM642936_OC01_ARN0005_s1h1s1_U133p2.CEL.gz
## ND4 14.21738
## COX1 13.85340
## RPL41 14.10495
## ATP6 14.01890
## EEF1A1 14.09997
## RPL37A 13.95778
## GSM642937_OC01_ARN0007_s1h1s1_U133p2.CEL.gz
## ND4 14.34638
## COX1 14.19546
## RPL41 14.05138
## ATP6 14.20252
## EEF1A1 13.94882
## RPL37A 13.95467
## GSM642938_OC01_ARN0008_s1h1s1_U133p2.CEL.gz
## ND4 13.73810
## COX1 14.03803
## RPL41 14.40365
## ATP6 13.81328
## EEF1A1 14.13210
## RPL37A 14.03432
## GSM642939_OC01_ARN0009_s1h1s1_U133p2.CEL.gz
## ND4 14.44950
## COX1 14.08644
## RPL41 14.08412
## ATP6 14.41309
## EEF1A1 13.87356
## RPL37A 14.04848
## GSM642940_OC01_ARN0011_s1h1s1_U133p2.CEL.gz
## ND4 14.29389
## COX1 13.92715
## RPL41 14.08640
## ATP6 14.26453
## EEF1A1 14.15267
## RPL37A 14.01831
## GSM642941_OC01_ARN0012_s1h1s1_U133p2.CEL.gz
## ND4 13.97111
## COX1 13.57598
## RPL41 14.22355
## ATP6 13.88233
## EEF1A1 14.13613
## RPL37A 13.92573
## GSM642943_OC01_ARN0014_s1h1s1_U133p2.CEL.gz
## ND4 14.29530
## COX1 13.96928
## RPL41 14.00965
## ATP6 14.18433
## EEF1A1 13.84907
## RPL37A 13.83259
## GSM642944_OC01_ARN0015_s1h1s1_U133p2.CEL.gz
## ND4 14.15642
## COX1 13.92579
## RPL41 14.13404
## ATP6 14.14130
## EEF1A1 13.88436
## RPL37A 14.02862
d<- duplicated(rownames(GSE_26193))
table(d)
## d
## FALSE
## 20864
GSE_26193<- GSE_26193[!d,]
o<- order(rowSums(exprs(GSE_14764)), decreasing = T)
GSE_14764<- GSE_14764[o,]
head(exprs(GSE_14764))[, 1:10]
## GSM368661.cel.gz GSM368662.cel.gz GSM368664.cel.gz GSM368665.cel.gz
## RPL41 13.36209 13.37945 13.68061 13.66169
## EEF1A1 13.05101 13.10914 13.45291 13.09676
## RPL23A 13.21419 13.29762 13.22367 13.25454
## RPL37A 13.25228 12.70039 13.12462 12.87569
## RPL13A 13.16902 12.88275 13.11861 13.03930
## RPS18 12.82202 12.88801 13.42103 12.85953
## GSM368666.cel.gz GSM368667.cel.gz GSM368668.cel.gz GSM368670.cel.gz
## RPL41 13.64596 13.61178 13.55445 13.62603
## EEF1A1 13.64948 13.44657 13.26581 13.48810
## RPL23A 13.50547 13.62354 13.54157 13.41085
## RPL37A 13.23872 12.50211 12.49782 13.12422
## RPL13A 13.21223 12.65268 12.64409 12.76070
## RPS18 13.24162 12.95164 12.65370 12.96781
## GSM368671.cel.gz GSM368673.cel.gz
## RPL41 13.75162 13.64442
## EEF1A1 13.65655 13.59476
## RPL23A 13.52068 13.67517
## RPL37A 13.14057 13.17259
## RPL13A 13.38848 13.38097
## RPS18 13.33783 13.12293
d<- duplicated(rownames(GSE_14764))
table(d)
## d
## FALSE
## 12645
GSE_14764<- GSE_14764[!d,]
o<- order(rowSums(exprs(GSE_63885)), decreasing = T)
GSE_63885<- GSE_63885[o,]
head(exprs(GSE_63885))[, 1:10]
## GSM1559299_P8.CEL.gz GSM1559300_P264.CEL.gz GSM1559301_T348.CEL.gz
## RPL41 14.14668 14.29164 14.18414
## RPL37A 14.16374 14.06431 14.12732
## RPS18 14.05659 13.84077 13.84848
## EEF1A1 13.99776 14.02816 14.15655
## ND4 14.07159 14.06878 14.15734
## RPL13A 14.14679 13.26606 13.66358
## GSM1559302_P67.CEL.gz GSM1559303_T175.CEL.gz GSM1559304_T360.CEL.gz
## RPL41 14.29392 14.26875 14.19185
## RPL37A 14.27454 14.21328 14.14387
## RPS18 14.03103 14.20205 14.11986
## EEF1A1 14.05293 14.13090 14.15776
## ND4 14.01997 14.04059 13.75544
## RPL13A 13.99426 13.90925 13.99475
## GSM1559305_T382.CEL.gz GSM1559306_P343.CEL.gz GSM1559307_T174.CEL.gz
## RPL41 14.15513 14.05765 14.32762
## RPL37A 14.13234 14.05478 14.26893
## RPS18 14.05817 14.21275 14.12396
## EEF1A1 13.96471 14.15797 14.10708
## ND4 14.06721 14.08082 13.88717
## RPL13A 14.04631 13.99562 13.94295
## GSM1559308_T398.CEL.gz
## RPL41 14.25283
## RPL37A 14.25449
## RPS18 14.15875
## EEF1A1 14.11421
## ND4 14.09875
## RPL13A 14.07334
d<- duplicated(rownames(GSE_63885))
table(d)
## d
## FALSE
## 20864
GSE_63885<- GSE_63885[!d,]
####################################
#Common genes in all datasets#
####################################
idfound<- rownames(TCGA.OV)%in% rownames(GSE_18520)
table(idfound)
## idfound
## FALSE TRUE
## 593 10285
TCGA.OV<- TCGA.OV[idfound,]
idfound<- rownames(TCGA.OV)%in% rownames(GSE_26712)
table(idfound)
## idfound
## FALSE TRUE
## 60 10225
TCGA.OV<- TCGA.OV[idfound,]
idfound<- rownames(TCGA.OV)%in% rownames(GSE_26193)
table(idfound)
## idfound
## TRUE
## 10225
TCGA.OV<- TCGA.OV[idfound,]
idfound<- rownames(TCGA.OV)%in% rownames(GSE_63885)
table(idfound)
## idfound
## TRUE
## 10225
TCGA.OV<- TCGA.OV[idfound,]
idfound<- rownames(TCGA.OV)%in% rownames(GSE_14764)
table(idfound)
## idfound
## TRUE
## 10225
TCGA.OV<- TCGA.OV[idfound,]
###########################################
#Arrange genes based on order of TCGA.OV#
###########################################
GSE_18520<- GSE_18520[rownames(TCGA.OV),]
GSE_26712<- GSE_26712[rownames(TCGA.OV),]
GSE_26193<- GSE_26193[rownames(TCGA.OV),]
GSE_63885<- GSE_63885[rownames(TCGA.OV),]
GSE_14764<- GSE_14764[rownames(TCGA.OV),]
##########################################
# Pool the datas#
##########################################
Samples<- cbind.data.frame(TCGA= ncol(TCGA.OV),GSE_18520=ncol(GSE_18520),
GSE_26712=ncol(GSE_26712),GSE_26193=ncol(GSE_26193),
GSE_63885=ncol(GSE_63885), GSE_14764= ncol(GSE_14764))
Samples$Total<- rowSums(Samples)
head(Samples)# 1022 Samples
## TCGA GSE_18520 GSE_26712 GSE_26193 GSE_63885 GSE_14764 Total
## Samples 544 63 195 79 73 68 1022
y.common<- SummarizedExperiment(assays=list(
counts= cbind(assay(TCGA.OV, "Signal"), assay(GSE_18520, "log2_RMA"),
assay(GSE_26712, "log2_RMA"), exprs(GSE_26193), exprs(GSE_63885), exprs(GSE_14764)
)),
rowData = rowData(TCGA.OV)
)
dim(y.common)
## [1] 10225 1022
TCGA.OV$description<- factor(TCGA.OV$primary_diagnosis)
TCGA.OV$shortLetterCode<- factor(TCGA.OV$shortLetterCode) # NT;NormalTissue, TP; TumorPrimary, TR; TumorRecurrent
GSE_18520$shortLetterCode <- GSE_18520$description
levels(GSE_18520$shortLetterCode)[2]<- "TP" # PT changed to TP for consistency with TCGA abbreviations
GSE_18520$description<- GSE_18520$characteristics_ch1
GSE_26712$shortLetterCode<- factor(GSE_26712$source_name_ch1)
levels(GSE_26712$shortLetterCode)<- c("NT", "TP")
GSE_26193$shortLetterCode<- factor(GSE_26193$type)
levels(GSE_26193$shortLetterCode)<- "TP"
GSE_26193$description<- factor(GSE_26193$type)
levels(GSE_26193$description)<- "Serous"
levels(GSE_26193$vital_status)<- c("Alive", "Dead")
GSE_26193$time <- GSE_26193$time.in.days
colData(y.common)<- rbind(colData(TCGA.OV) [,c("shortLetterCode", "description", "time", "vital_status")],
colData(GSE_18520)[, c("shortLetterCode","description", "time", "vital_status")],
colData(GSE_26712)[, c("shortLetterCode","description", "time", "vital_status")],
pData(GSE_26193) [, c("shortLetterCode","description", "time", "vital_status")],
pData(GSE_63885) [, c("shortLetterCode","description", "time", "vital_status")],
pData(GSE_14764) [, c("shortLetterCode","description", "time", "vital_status")]
)
table(y.common$shortLetterCode)
##
## NT TP TR
## 28 979 15
y.common$batch<- factor(c(rep("TCGA",times=ncol(TCGA.OV)),
rep("GSE18520",times=ncol(GSE_18520)),
rep("GSE26712", times=ncol(GSE_26712)),
rep("GSE26193", times=ncol(GSE_26193)),
rep("GSE63855", times=ncol(GSE_63885)),
rep("GSE14764", times=ncol(GSE_14764)))
)
y.common$batch<- relevel(y.common$batch, ref= "TCGA")
rowData(y.common)$Symbol<-rowData(y.common)$gene_id
dim(y.common) #10255 genes and 1022 samples
## [1] 10225 1022
assay(y.common, "normalized")<- normalizeBetweenArrays(assay(y.common, "counts")) # normalize array
summary(colSums(assay(y.common, "counts")))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 51062 54495 56356 61324 65567 89481
summary(colSums(assay(y.common, "normalized")))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 61324 61324 61324 61324 61324 61324
##################################################
###Remove batch effect#
##################################################
assay(y.common, "normalized_batch")<- removeBatchEffect(assay(y.common, "normalized"), batch= y.common$batch)
my.colors= c("black", "blue","green" ,"yellow", "orange", "red")
points<- c(1:3)
corr_tcga_gse<- cor(assay(y.common, "normalized"))
y.common$correlation<- corr_tcga_gse[,1]
corr_tcga_gse_batch<- cor(assay(y.common, "normalized_batch"))
y.common$correlation_batch<- corr_tcga_gse_batch[,1]
summary(y.common$correlation)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1309 0.7591 0.8727 0.8302 0.9126 1.0000
summary(y.common$correlation_batch)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.4003 0.8830 0.8996 0.8939 0.9130 1.0000
par(mfrow=c(1,2))
my.colors<- c("black", "blue", "green", "yellow", "orange", "red")
points<- c(1:3)
set.seed(123)
plotMDS(assay(y.common, "normalized"), col= my.colors[y.common$batch], pch= points[y.common$shortLetterCode],
main= "MDSplot_TCGA_GSE_normalized")
legend("topright", legend= levels(y.common$batch), col=my.colors, pch=1, cex=0.75)
legend("topleft", legend= levels(y.common$shortLetterCode), col= "black", pch= points, cex=0.75)
table(y.common$shortLetterCode)
##
## NT TP TR
## 28 979 15
set.seed(123)
plotMDS(assay(y.common, "normalized_batch"), col= my.colors[y.common$batch], pch= points[y.common$shortLetterCode],
main= "MDSplot_TCGA_GSE_normalized_batch")

###############################
# corelation cut-off 0.7 #
##############################
y.common_cor<- y.common[,(y.common$correlation>=0.7)] # keep samples with min 70% similarity
table(y.common_cor$shortLetterCode) #24 PT, 901 TP, 15 TR
##
## NT TP TR
## 24 901 15
y.common_cor_batch<- y.common[,(y.common$correlation_batch>=0.7)]
table(y.common_cor_batch$shortLetterCode) #28 PT, 973 TP, 15 TR
##
## NT TP TR
## 28 973 15
y.tcga.gse_cor<- assay(y.common_cor, "normalized")
dim(y.tcga.gse_cor) #10225 genes and 940 samples
## [1] 10225 940
y.tcga.gse_cor_batch<- assay(y.common_cor_batch, "normalized_batch")
dim(y.tcga.gse_cor_batch)#10255 genes and 1016 samples
## [1] 10225 1016
#####################
#design-matrix#
#####################
design<- model.matrix(~y.common_cor$batch+y.common_cor$shortLetterCode, data= colData(y.common_cor))
colnames(design)
## [1] "(Intercept)" "y.common_cor$batchGSE14764"
## [3] "y.common_cor$batchGSE18520" "y.common_cor$batchGSE26193"
## [5] "y.common_cor$batchGSE26712" "y.common_cor$batchGSE63855"
## [7] "y.common_cor$shortLetterCodeTP" "y.common_cor$shortLetterCodeTR"
#design_batch
design_batch<- model.matrix(~y.common_cor_batch$batch+y.common_cor_batch$shortLetterCode, data= colData(y.common_cor_batch))
colnames(design_batch)
## [1] "(Intercept)"
## [2] "y.common_cor_batch$batchGSE14764"
## [3] "y.common_cor_batch$batchGSE18520"
## [4] "y.common_cor_batch$batchGSE26193"
## [5] "y.common_cor_batch$batchGSE26712"
## [6] "y.common_cor_batch$batchGSE63855"
## [7] "y.common_cor_batch$shortLetterCodeTP"
## [8] "y.common_cor_batch$shortLetterCodeTR"
fit<- lmFit(y.tcga.gse_cor, design)
fit.treat<- treat(fit,fc=1.5, trend=T, robust = T)
summary(decideTests(fit.treat))
## (Intercept) y.common_cor$batchGSE14764 y.common_cor$batchGSE18520
## Down 0 2157 2050
## NotSig 0 5825 6248
## Up 10225 2243 1927
## y.common_cor$batchGSE26193 y.common_cor$batchGSE26712
## Down 1500 2361
## NotSig 7265 5488
## Up 1460 2376
## y.common_cor$batchGSE63855 y.common_cor$shortLetterCodeTP
## Down 1577 604
## NotSig 7079 9214
## Up 1569 407
## y.common_cor$shortLetterCodeTR
## Down 387
## NotSig 9583
## Up 255
topTreat(fit.treat, coef=7)
## logFC AveExpr t P.Value adj.P.Val
## PDE8B -2.958343 4.657358 -24.83294 2.713855e-105 2.774917e-101
## ABCA8 -4.959095 4.353356 -20.10216 2.728834e-75 1.395116e-71
## FRY -2.716927 4.718841 -19.67601 1.045609e-72 3.374259e-69
## BCHE -4.546883 3.850600 -19.66099 1.320004e-72 3.374259e-69
## KDR -2.746455 4.885296 -19.27283 2.836310e-70 5.800253e-67
## ALDH1A2 -4.634721 5.037018 -19.16503 1.285731e-69 2.191100e-66
## NELL2 -4.269591 5.491429 -18.21096 5.793290e-64 8.462341e-61
## PRG4 -2.322605 3.690822 -18.10170 2.514308e-63 3.213600e-60
## PROCR -2.725052 5.083648 -16.61484 7.952715e-55 9.035168e-52
## REEP1 -3.216113 5.202065 -16.37232 1.796412e-53 1.836831e-50
#fit_batch
fit_batch<- lmFit(y.tcga.gse_cor_batch, design_batch)
fit.treat_batch<- treat(fit_batch,fc=1.5, trend=T, robust = T)
summary(decideTests(fit.treat_batch))
## (Intercept) y.common_cor_batch$batchGSE14764
## Down 0 0
## NotSig 0 10225
## Up 10225 0
## y.common_cor_batch$batchGSE18520 y.common_cor_batch$batchGSE26193
## Down 0 0
## NotSig 10225 10225
## Up 0 0
## y.common_cor_batch$batchGSE26712 y.common_cor_batch$batchGSE63855
## Down 0 0
## NotSig 10225 10225
## Up 0 0
## y.common_cor_batch$shortLetterCodeTP
## Down 649
## NotSig 9163
## Up 413
## y.common_cor_batch$shortLetterCodeTR
## Down 399
## NotSig 9592
## Up 234
topTreat(fit.treat_batch, coef=7)
## logFC AveExpr t P.Value adj.P.Val
## PDE8B -2.995180 4.539606 -26.02577 4.726398e-115 4.832742e-111
## KDR -2.928108 4.789632 -22.48891 1.812459e-91 9.266199e-88
## PRG4 -2.588567 3.523736 -22.43230 4.377221e-91 1.491903e-87
## ABCA8 -5.050961 4.312954 -21.06813 2.773090e-82 7.088711e-79
## BCHE -4.509686 3.757108 -20.97090 1.153730e-81 2.359377e-78
## FRY -2.727236 4.659661 -20.58400 3.165120e-79 5.393891e-76
## ALDH1A2 -4.665667 5.283042 -20.45095 2.258316e-78 3.298755e-75
## NELL2 -4.198817 5.507578 -19.64696 2.357152e-73 3.012735e-70
## PROCR -2.820136 4.489355 -18.95678 4.060233e-69 4.612876e-66
## CLEC4M -2.276993 4.306947 -18.40005 9.381371e-66 9.592451e-63
VO<- cbind.data.frame(decideTests(fit.treat)[,7],decideTests(fit.treat_batch)[,7] )
colnames(VO)<- c("Norm", "batch_Norm")
vennDiagram(VO, include= c("up", "down"), circle.col = c("blue", "red"), counts.col = c("red", "blue"))
par(mfrow=c(1,2))

plotMD(fit.treat_batch, coef=7, status=decideTests(fit.treat_batch[,7]), main= "MA plot", legend= "bottomright")
abline(h=c(-log2(1.5),log2(1.5)), lty=2)
#Adding no. genes up and down inside the plot
text(4,2.75, summary(decideTests(fit.treat_batch[,7]))[1], col="red")
text(12,-2.75, summary(decideTests(fit.treat_batch[,7]))[3], col="blue")
#Adding gene details
egSymbol<- toTable(org.Hs.egSYMBOL)
m<- match(rownames(fit.treat_batch), egSymbol$symbol)
genedetails<- data.frame( genesymbol=rownames(fit.treat_batch), gene_id= egSymbol$gene_id[m],
status_PTvsNT=factor(decideTests(fit.treat_batch[,7])) )
fit.treat_batch[["genedetails"]]<-genedetails
head(fit.treat_batch$genedetails)
## genesymbol gene_id status_PTvsNT
## 1 EEF1A1 1915 0
## 2 RPS3A 6189 0
## 3 RPS4X 6191 0
## 4 RPS27 6232 0
## 5 RPL39 6170 0
## 6 RPS23 6228 0
my.colors_vol<- c("blue", "black", "red")
my.points_vol<- c(1,0.25,1)
volcanoplot(fit.treat_batch, coef=7, names= rownames(fit.treat_batch), highlight = 10,
col=my.colors_vol[fit.treat_batch$gene$status], hl.col = "black",
cex=my.points_vol[fit.treat_batch$gene$status_PTvsNT], main= "Volcanoplot" )
legend=legend("topright",legend= c("Notsig", "Up", "Down"), col= c("black", "red", "blue"), pch=20, pt.cex= c(0.25,1,1) )
abline(v=c(-log2(1.5), log2(1.5)),lty=2)

##########################################
#MDSplot with SDE genes of TP
#########################################
set.seed(123)
plotMDS(assay(y.common_cor_batch, "normalized_batch")[rownames(topTreat(fit.treat_batch, coef=7, number=30)), ], col= my.colors[y.common_cor_batch$batch], pch= points[y.common_cor_batch$shortLetterCode],
main= "MDSplot_TCGA_GSE_normalized_batch_30")
legend("topright", legend= levels(y.common$batch), col=my.colors, pch=1, cex=0.75)
legend("topleft", legend= levels(y.common_cor_batch$shortLetterCode), col= "black", pch= points, cex=0.75)
par(mfrow=c(2,5))

SDE_PT<-y.tcga.gse_cor_batch[,y.common_cor_batch$shortLetterCode!="TR"]
dim(SDE_PT)
## [1] 10225 1001
Type<- factor(y.common_cor_batch$shortLetterCode[y.common_cor_batch$shortLetterCode!="TR"])
for (i in 1:10) {
plot(SDE_PT[rownames(topTreat(fit.treat_batch,coef=7, n="inf"))[i],]~Type,col= c("black", "blue"),
xlab= "", ylab= "log2.expression", main= rownames(topTreat(fit.treat_batch,coef=7, n="inf"))[i])
}

##################################
#pathway analysis (TUMOR VS CTRL)#
##################################
go<- goana(fit.treat_batch, coef=7,geneid = fit.treat_batch$genedetails$gene_id,species= "Hs" )
topGO(go, ontology = "BP") # MITOTIC CELL,G1/S DNArepair pathways UPREGULATED IN CANCER VS NORMAL
## Term Ont N
## GO:1903047 mitotic cell cycle process BP 611
## GO:0000278 mitotic cell cycle BP 716
## GO:0051301 cell division BP 399
## GO:0022402 cell cycle process BP 913
## GO:0007049 cell cycle BP 1233
## GO:0000280 nuclear division BP 268
## GO:0000070 mitotic sister chromatid segregation BP 113
## GO:0000819 sister chromatid segregation BP 130
## GO:0007059 chromosome segregation BP 201
## GO:0140014 mitotic nuclear division BP 202
## GO:0048285 organelle fission BP 298
## GO:0098813 nuclear chromosome segregation BP 159
## GO:0044772 mitotic cell cycle phase transition BP 417
## GO:0044770 cell cycle phase transition BP 444
## GO:0051276 chromosome organization BP 777
## GO:0051726 regulation of cell cycle BP 822
## GO:0010564 regulation of cell cycle process BP 535
## GO:1902850 microtubule cytoskeleton organization involved in mitosis BP 104
## GO:0007051 spindle organization BP 124
## GO:0071103 DNA conformation change BP 195
## Up Down P.Up P.Down
## GO:1903047 91 34 4.209569e-29 0.8159693
## GO:0000278 99 40 4.306685e-29 0.8272566
## GO:0051301 69 29 7.893316e-26 0.2480253
## GO:0022402 105 47 3.371654e-24 0.9515656
## GO:0007049 124 68 1.546724e-23 0.9118797
## GO:0000280 52 13 6.723441e-22 0.8769150
## GO:0000070 34 2 4.922454e-21 0.9949247
## GO:0000819 36 3 7.465585e-21 0.9907226
## GO:0007059 44 4 8.245322e-21 0.9990917
## GO:0140014 44 9 1.016346e-20 0.9016661
## GO:0048285 53 16 1.742421e-20 0.7919150
## GO:0098813 38 3 1.663878e-19 0.9980519
## GO:0044772 58 19 4.664767e-17 0.9543943
## GO:0044770 59 20 2.137449e-16 0.9633394
## GO:0051276 80 29 2.076509e-15 0.9996679
## GO:0051726 82 41 5.334810e-15 0.9627592
## GO:0010564 62 23 2.420275e-14 0.9854882
## GO:1902850 24 7 2.161967e-12 0.4928457
## GO:0007051 26 6 2.747498e-12 0.8072682
## GO:0071103 32 9 8.970279e-12 0.8786757
#write.csv(topGO(go, ontology = "BP"), file= "topGO.csv")
topGO.PT<- topGO(go, ontology = "BP", truncate.term = 30)
kegg<- kegga(fit.treat_batch, coef=7,geneid = fit.treat_batch$genedetails$gene_id,species= "Hs")
topKEGG(kegg) # cellcycle, dna replication, Diabetic cardiomyopathy, NAFLD upregulated
## Pathway N Up Down P.Up
## path:hsa04110 Cell cycle 104 23 2 1.611804e-11
## path:hsa04610 Complement and coagulation cascades 77 1 16 9.586955e-01
## path:hsa03030 DNA replication 32 8 0 2.962288e-05
## path:hsa05012 Parkinson disease 198 20 10 1.453050e-04
## path:hsa05205 Proteoglycans in cancer 181 4 25 9.384746e-01
## path:hsa00190 Oxidative phosphorylation 88 12 1 2.041266e-04
## path:hsa05143 African trypanosomiasis 31 1 8 7.219724e-01
## path:hsa04512 ECM-receptor interaction 76 10 7 9.099854e-04
## path:hsa04923 Regulation of lipolysis in adipocytes 50 0 10 1.000000e+00
## path:hsa05165 Human papillomavirus infection 285 23 21 1.226189e-03
## path:hsa04510 Focal adhesion 176 10 22 1.748276e-01
## path:hsa01230 Biosynthesis of amino acids 58 8 1 2.155896e-03
## path:hsa05020 Prion disease 211 18 10 2.221340e-03
## path:hsa00830 Retinol metabolism 38 0 8 1.000000e+00
## path:hsa00340 Histidine metabolism 16 0 5 1.000000e+00
## path:hsa04115 p53 signaling pathway 61 8 5 2.981696e-03
## path:hsa00982 Drug metabolism - cytochrome P450 40 1 8 8.084109e-01
## path:hsa04114 Oocyte meiosis 93 10 8 4.197265e-03
## path:hsa05130 Pathogenic Escherichia coli infection 156 14 10 4.230349e-03
## path:hsa00330 Arginine and proline metabolism 34 1 7 7.544169e-01
## P.Down
## path:hsa04110 9.914401e-01
## path:hsa04610 2.069782e-05
## path:hsa03030 1.000000e+00
## path:hsa05012 8.150140e-01
## path:hsa05205 1.872063e-04
## path:hsa00190 9.969609e-01
## path:hsa05143 5.387913e-04
## path:hsa04512 2.063235e-01
## path:hsa04923 1.003968e-03
## path:hsa05165 2.689900e-01
## path:hsa04510 1.720395e-03
## path:hsa01230 9.779489e-01
## path:hsa05020 8.694962e-01
## path:hsa00830 2.253225e-03
## path:hsa00340 2.461951e-03
## path:hsa04115 3.451099e-01
## path:hsa00982 3.167943e-03
## path:hsa04114 2.370325e-01
## path:hsa05130 5.342913e-01
## path:hsa00330 4.794405e-03
#write.csv(topKEGG(kegg), file= "topKEGG.csv")
topkegg.PT<- topKEGG(kegg, truncate.path = 30)
################################################
#Significant differential genes; Tumor vs Normal
################################################
differentailgenes<- rownames(decideTests(fit.treat_batch[,7]))[decideTests(fit.treat_batch[,7])!=0]
length(differentailgenes)
## [1] 1062
y.common_surv<- y.common_cor_batch[c(differentailgenes), ] # 1062 de genes and 1016 samples and
y.common_surv<-y.common_surv[,!is.na(y.common_surv$time)] # 982 samples
y.common_surv<-y.common_surv[,y.common_surv$shortLetterCode!="NT"] # excluding normal samples
y.common_surv$shortLetterCode<- factor(y.common_surv$shortLetterCode)
dim(y.common_surv) #1062 genes and 982 samples
## [1] 1062 982
y.common_surv$vital_status <- factor(y.common_surv$vital_status)
table(y.common_surv$shortLetterCode) #967 primary and 15 recurrent tumor with survival time
##
## TP TR
## 967 15
######################################
#Splitting and merging Pheno and counts data#
#####################################
OV.PhenoData<- data.frame(colData(y.common_surv))
OV.Counts<- assay(y.common_surv, "normalized_batch")
OV.Counts<- OV.Counts[order(rowSums(OV.Counts), decreasing= T),]
head(OV.Counts)[, 1:5]
## TCGA-29-1692-01B-01R-0808-01 TCGA-13-1488-01A-01R-0477-01
## GAPDH 12.58758 12.68406
## TUBA1B 11.80367 11.78490
## TUBA1C 11.75346 11.91617
## RPL18A 11.47641 10.88576
## COX6C 11.08334 11.63995
## WFDC2 10.26358 10.48439
## TCGA-13-1483-01A-01R-0477-01 TCGA-24-2024-01A-02R-0668-01
## GAPDH 12.58758 12.49987
## TUBA1B 11.16848 11.62928
## TUBA1C 11.15961 11.55965
## RPL18A 11.18119 11.55521
## COX6C 11.21686 10.57146
## WFDC2 10.72970 10.58379
## TCGA-13-0908-01B-01R-0453-01
## GAPDH 12.46622
## TUBA1B 11.47308
## TUBA1C 11.57144
## RPL18A 10.83822
## COX6C 12.06827
## WFDC2 10.22433
d<- duplicated(rownames(OV.Counts))
table(d)
## d
## FALSE
## 1062
OV.Counts<- OV.Counts[!d,]
covariates<- rownames(OV.Counts)
OV_TCGA_GSE<-cbind.data.frame(OV.PhenoData,t(OV.Counts))
colnames(OV_TCGA_GSE) # c1 to 7 is PhenoData
## [1] "shortLetterCode" "description" "time"
## [4] "vital_status" "batch" "correlation"
## [7] "correlation_batch" "GAPDH" "TUBA1B"
## [10] "TUBA1C" "RPL18A" "COX6C"
## [13] "WFDC2" "TUBB" "COX6A1"
## [16] "VIM" "ASS1" "SERF2"
## [19] "COL1A1" "C3" "COX5B"
## [22] "TPI1" "LGALS3" "TIMP1"
## [25] "COX7B" "SPINT2" "S100A10"
## [28] "CAPN2" "HDGF" "SPP1"
## [31] "MARCKSL1" "MUC1" "SPON1"
## [34] "TXNIP" "UCP2" "IFI27"
## [37] "PNN" "UQCRQ" "PFN1"
## [40] "SNRPB" "MT1X" "PEA15"
## [43] "SDCBP" "NPTN" "NCOA4"
## [46] "TMED10" "TAGLN2" "BANF1"
## [49] "TACSTD2" "GPI" "PRKCI"
## [52] "PSMB2" "GSTP1" "S100A13"
## [55] "C19orf53" "FOLR1" "WT1"
## [58] "KRT8" "CCT5" "S100A11"
## [61] "DCN" "PUF60" "C1S"
## [64] "NDUFB11" "KPNA2" "IGFBP2"
## [67] "ASAH1" "CYC1" "ST13"
## [70] "CRABP2" "CALD1" "BZW2"
## [73] "ISG15" "PAX8" "JUN"
## [76] "FAM120A" "MT1G" "BTG3"
## [79] "LTA4H" "ANXA5" "SFN"
## [82] "PNRC2" "S100A4" "PRKAR1A"
## [85] "COL4A2" "PRKCSH" "CYFIP1"
## [88] "PTTG1" "FKBP1A" "YWHAE"
## [91] "TPM1" "MTHFD2" "COL4A1"
## [94] "TPD52L1" "RPN1" "SERPING1"
## [97] "ATP6V0B" "MRPL15" "AURKAIP1"
## [100] "HTRA1" "CKS1B" "AHCY"
## [103] "SH3BGRL" "UGP2" "PDGFC"
## [106] "GANAB" "NFE2L2" "CXCR4"
## [109] "SHMT2" "HYOU1" "CIRBP"
## [112] "OAT" "RCN2" "POLR2H"
## [115] "IFI16" "CKS2" "PRMT1"
## [118] "SERPINH1" "VEGFA" "CLDN3"
## [121] "FKBP4" "LSM4" "TCEAL4"
## [124] "TRIM28" "SPARCL1" "CP"
## [127] "SYNGR2" "LCN2" "OGT"
## [130] "CALR" "GJA1" "B4GALT5"
## [133] "MDK" "YPEL5" "SOX17"
## [136] "SLC34A2" "SLC39A6" "JUP"
## [139] "ALDH2" "UBE2C" "CTSA"
## [142] "PAIP1" "ALDH9A1" "RBM5"
## [145] "BCAM" "HNRNPA0" "EIF6"
## [148] "MLF2" "ANXA4" "CFI"
## [151] "TSPAN13" "SCGB2A1" "CALU"
## [154] "JAK1" "MCM6" "LMBRD1"
## [157] "ELF3" "TOB1" "SMC4"
## [160] "HMGA1" "IDH2" "LRRC8D"
## [163] "USO1" "PFKP" "TYMS"
## [166] "KLK7" "ELOVL5" "ARPC5L"
## [169] "PODXL" "PDCD4" "TUBB3"
## [172] "FIBP" "SNRPC" "NR2F6"
## [175] "PPP4C" "ABHD11" "NDUFS8"
## [178] "PDIA4" "LRIG1" "SPTBN1"
## [181] "MRPS15" "GMPS" "HEXB"
## [184] "TPD52L2" "TSPYL1" "TES"
## [187] "SRRM1" "MMP7" "PPIF"
## [190] "COPE" "GAS1" "KRT7"
## [193] "MCM2" "TMEM9B" "ROCK1"
## [196] "TNFAIP2" "MYLK" "HEBP1"
## [199] "KLK6" "TPP1" "WSB1"
## [202] "KLK8" "PLS3" "FBXO9"
## [205] "PRNP" "RFC4" "SMARCC1"
## [208] "METTL7A" "NUSAP1" "IFI6"
## [211] "GNG12" "ARF5" "SLC2A1"
## [214] "CAST" "ZWINT" "DPYSL2"
## [217] "IL1R1" "SERINC1" "LIPA"
## [220] "CRIP2" "EIF4G1" "IK"
## [223] "RAB31" "PDLIM5" "HBB"
## [226] "IGBP1" "IL6ST" "ATP6V1D"
## [229] "HSD17B4" "GPRC5B" "FEZ2"
## [232] "RNF103" "SELENBP1" "CORO1B"
## [235] "LSR" "CLDN7" "RUSC1"
## [238] "CAB39" "FAM8A1" "BACE2"
## [241] "RALA" "TOR1AIP1" "RRBP1"
## [244] "SLC39A4" "ZBED5" "DCTD"
## [247] "RRM2" "EXOSC4" "SH3BP5"
## [250] "PSAT1" "RFXANK" "NUCB1"
## [253] "FBXL5" "SF3B4" "MCM3"
## [256] "TPBG" "ECHDC2" "PPP1R14B"
## [259] "MYO10" "GIPC1" "TJP2"
## [262] "PFDN4" "RASA1" "ST14"
## [265] "RDX" "SORT1" "FXYD3"
## [268] "ADRM1" "THY1" "MFAP2"
## [271] "HGS" "CCNB2" "SEC24C"
## [274] "KLHDC3" "MRPL12" "KLF10"
## [277] "HMGB3" "NR3C1" "TPX2"
## [280] "NUTF2" "CREBL2" "TACC1"
## [283] "DLG5" "MGP" "NBR1"
## [286] "SERPINB6" "UBAP2L" "RGS1"
## [289] "DBN1" "MCM7" "RGL1"
## [292] "RBL2" "GTF2IRD1" "TST"
## [295] "PREPL" "SCRIB" "BCL2L1"
## [298] "HBA2" "PCCB" "CD14"
## [301] "LIMA1" "CREB3L2" "ILVBL"
## [304] "ATG4B" "TERF2IP" "MAL"
## [307] "TRIM22" "MAP1LC3B" "PPP3CB"
## [310] "CDKN2A" "CENPF" "CALCOCO2"
## [313] "IMPA1" "POP7" "SPG11"
## [316] "CAPG" "ACAA2" "CCNE1"
## [319] "CRYZ" "MX2" "CLIC5"
## [322] "MRPL13" "PLD3" "ALG3"
## [325] "AGPAT2" "TUBA4A" "BCL7C"
## [328] "MRPL4" "FKBP11" "DDX49"
## [331] "FHL1" "CAV1" "PMP22"
## [334] "LANCL1" "AFF1" "STIP1"
## [337] "RALY" "ADAM9" "CDC20"
## [340] "MELK" "ATP6V0E2" "TPM2"
## [343] "DNAJB9" "EHF" "TOP2A"
## [346] "MICAL2" "UNC119B" "CCNB1"
## [349] "FUT8" "HEG1" "SLTM"
## [352] "UBL3" "DDX11" "PURA"
## [355] "LAGE3" "S100A2" "RACGAP1"
## [358] "SORD" "HSD17B11" "ISG20L2"
## [361] "MPHOSPH6" "EBP" "VWF"
## [364] "ELOVL1" "P4HA2" "PKIG"
## [367] "PNO1" "NME7" "PRSS8"
## [370] "TSPYL4" "STX12" "EHBP1"
## [373] "CTNNAL1" "S100A1" "FTO"
## [376] "CDH6" "INPPL1" "PSME4"
## [379] "S100A14" "TGFBR2" "UBE2S"
## [382] "MMP11" "KRT17" "BDH2"
## [385] "ACTL6A" "TXNDC15" "PRKDC"
## [388] "TMOD1" "NDUFB7" "ISOC1"
## [391] "SEC24B" "KIF2C" "PARVA"
## [394] "LXN" "CHD9" "LSM2"
## [397] "SKIL" "VPS13C" "RNF38"
## [400] "MAZ" "NEU1" "BGN"
## [403] "RAP1GAP" "TRIP13" "INHBB"
## [406] "RGS5" "CDCA4" "GOLM1"
## [409] "ZBTB20" "CD44" "RNASEH2A"
## [412] "EIF4EBP1" "MST1" "TNFRSF12A"
## [415] "DEFB1" "SLC43A3" "SOX9"
## [418] "ITGB8" "PRKACB" "CFDP1"
## [421] "SNX7" "BBS1" "SLC25A11"
## [424] "RHOT1" "NEK9" "BUB1B"
## [427] "EPB41L4B" "CERK" "DAB2"
## [430] "GRINA" "SNRK" "RRAS2"
## [433] "PAFAH1B3" "MTSS1" "PRPS1"
## [436] "FNDC3A" "ZNF395" "ECT2"
## [439] "CPE" "PRC1" "FARSA"
## [442] "PUS7" "SGCE" "DHRS7"
## [445] "ANXA3" "GINS1" "CLDN4"
## [448] "MRPS12" "MAD2L1" "ZNF93"
## [451] "SLC2A4RG" "SLC5A6" "SMG7"
## [454] "PRAME" "RANGAP1" "GCC2"
## [457] "MFN1" "RBM38" "APOA1"
## [460] "DAPK1" "RGS2" "NMU"
## [463] "EPB41L2" "TIAM1" "PDGFRA"
## [466] "ZNF593" "SEMA3C" "TMPRSS4"
## [469] "CDCA8" "KLK5" "LEPROTL1"
## [472] "MAP7D1" "AURKA" "TCEAL1"
## [475] "S100A8" "PKP3" "SYNE2"
## [478] "ACOT9" "KIFC1" "CRIM1"
## [481] "GTF2H1" "HSDL2" "HNMT"
## [484] "MAOB" "ESPL1" "TCTN1"
## [487] "JAG2" "POR" "RBM6"
## [490] "TBC1D9" "GALNT6" "EFEMP1"
## [493] "FBXW11" "OSBPL1A" "PTP4A3"
## [496] "MCM4" "LGALS8" "TARBP2"
## [499] "VCAM1" "MTUS1" "YKT6"
## [502] "FGF18" "AKAP11" "NOTCH3"
## [505] "AUH" "ZNF330" "TMEM47"
## [508] "GGH" "TGFBR3" "RARRES1"
## [511] "TNNT1" "RAPGEF2" "PER2"
## [514] "CLDN15" "GLDC" "PJA2"
## [517] "ATF1" "ANKMY2" "ASF1B"
## [520] "DTYMK" "GLS" "HSF1"
## [523] "SST" "NUP210" "CETN3"
## [526] "TK1" "FJX1" "SRD5A1"
## [529] "VSIG4" "OVOL2" "KIF20A"
## [532] "SMARCA2" "RCHY1" "FASN"
## [535] "RBPMS" "SAC3D1" "CTSO"
## [538] "NCAPD3" "TM7SF2" "CPVL"
## [541] "BIRC5" "NT5DC2" "TTC28"
## [544] "GSTM3" "RAB11FIP2" "HMMR"
## [547] "FKBP8" "CCDC85B" "TRAK2"
## [550] "RABGAP1L" "FBXW2" "PYCR1"
## [553] "NFIC" "OVGP1" "CCND2"
## [556] "TBC1D2B" "PLSCR4" "STXBP1"
## [559] "SDC1" "HP" "LAD1"
## [562] "SDC2" "GATA6" "PCDH7"
## [565] "FLAD1" "SLK" "EPS8"
## [568] "CLK1" "MEIS2" "GRHL2"
## [571] "CCDC28A" "ASPM" "DSE"
## [574] "GCA" "EFHC1" "PIK3R1"
## [577] "WDR37" "BNC2" "CEP55"
## [580] "NCAPD2" "AMIGO2" "LAMP3"
## [583] "CENPA" "TRAF5" "UCK2"
## [586] "MRPL2" "SLIT2" "VWA1"
## [589] "NEK2" "KLF2" "LOXL2"
## [592] "LMNB2" "SETX" "PLCB1"
## [595] "G0S2" "FZD7" "PTGIS"
## [598] "NDN" "TNFAIP8" "TROAP"
## [601] "TIMELESS" "SATB1" "TBRG4"
## [604] "EMP3" "HERC1" "FOXM1"
## [607] "BTAF1" "DDHD2" "ZBTB1"
## [610] "CLDN1" "C7" "ING3"
## [613] "RCBTB2" "RAP2C" "SCG5"
## [616] "TFAP2A" "COBL" "CDKN3"
## [619] "TSC1" "HOXC6" "SLC39A8"
## [622] "TRIB3" "ZDHHC13" "ST3GAL5"
## [625] "ATXN1" "ATAD2" "WNT5A"
## [628] "ERLIN2" "INPP1" "PRSS21"
## [631] "KIAA1109" "TTK" "ARMCX1"
## [634] "WDR19" "ZDHHC17" "HMGA2"
## [637] "ARHGEF10" "MPDZ" "MLXIP"
## [640] "PDZD2" "RPL39L" "MDFIC"
## [643] "LZTFL1" "GLIPR1" "LRRN2"
## [646] "PKN1" "TFPI2" "COL4A5"
## [649] "PAPSS2" "INSR" "GNG11"
## [652] "FRMD4B" "ZWILCH" "MPZL2"
## [655] "RASAL1" "MYBL2" "VPS13B"
## [658] "CDCA3" "FAS" "EFNB3"
## [661] "TACC3" "CEBPG" "COL9A2"
## [664] "S100PBP" "B3GAT3" "MKI67"
## [667] "HSPA2" "RAD51AP1" "PSRC1"
## [670] "GPSM2" "CHODL" "HERC2"
## [673] "STX2" "FANCI" "SECISBP2"
## [676] "PITPNC1" "DDR2" "BOP1"
## [679] "DHX29" "PYGL" "GALC"
## [682] "PKP2" "SESN1" "MBP"
## [685] "USP18" "CFH" "KIF13B"
## [688] "IGFBP6" "CBX7" "LAMB1"
## [691] "FYCO1" "SPAG9" "HTR3A"
## [694] "C3AR1" "BLM" "MAP3K5"
## [697] "STIL" "BEX1" "FZD1"
## [700] "OASL" "MAF" "SFMBT1"
## [703] "UPK1B" "PDLIM7" "PAK1IP1"
## [706] "GINS2" "HSPA12A" "ANKRA2"
## [709] "PBK" "PRKCA" "C6orf120"
## [712] "LMNB1" "C14orf132" "PDGFD"
## [715] "SORBS2" "CDC42EP3" "AGA"
## [718] "ETS2" "TBC1D4" "LIMCH1"
## [721] "ARMC8" "TRIM2" "ERCC5"
## [724] "KLF4" "SVEP1" "XPA"
## [727] "PRKAR2B" "PBX3" "ITCH"
## [730] "RECQL4" "BTN3A3" "RAB11FIP3"
## [733] "LRP8" "PALMD" "LRFN4"
## [736] "TNKS" "PROS1" "FLRT2"
## [739] "MAP4K1" "KIF4A" "SEMA5A"
## [742] "SNAI2" "BAK1" "PDLIM3"
## [745] "DTL" "POLQ" "AQP9"
## [748] "CCNA2" "IFT88" "GALNT12"
## [751] "WASF3" "NF1" "FBXL7"
## [754] "PDGFRL" "SFRP1" "EDNRA"
## [757] "SERPINB9" "CDC25A" "ID3"
## [760] "CBR4" "GPR137B" "PEX12"
## [763] "DOCK4" "HSPB8" "MAOA"
## [766] "MAN1A1" "ELOVL6" "SIRPA"
## [769] "TLE4" "MBNL3" "NCAPH"
## [772] "SPAG5" "AMPD3" "CAV2"
## [775] "ATF7IP2" "AHI1" "CRYL1"
## [778] "UPK3B" "ATP6V1B1" "PEG3"
## [781] "EZH2" "KLF11" "LAMA4"
## [784] "GATM" "CACNA1A" "RAB8B"
## [787] "DIAPH2" "ZNF211" "RGS4"
## [790] "RPS6KA2" "KIF11" "DNMT3B"
## [793] "POLE2" "GSPT2" "IRS1"
## [796] "FZD10" "CADPS2" "RAD51"
## [799] "SAMD4A" "MEF2C" "CENPN"
## [802] "COLEC12" "FGL2" "MEGF9"
## [805] "FBP1" "WDR7" "VLDLR"
## [808] "KIF22" "RAB38" "SLC46A3"
## [811] "TBP" "RAD54L" "ZFPM2"
## [814] "ALDH1A3" "SLC31A2" "BMP4"
## [817] "PPP1R12B" "FBXO3" "FLNC"
## [820] "COMP" "EPHB6" "PCDH17"
## [823] "CX3CR1" "DTNA" "CDT1"
## [826] "CFD" "NELL2" "BACH1"
## [829] "NDC80" "INSIG2" "NAT1"
## [832] "YTHDC2" "PLA2G2A" "SNCA"
## [835] "SULT1C2" "AGR2" "ZNF175"
## [838] "BUB1" "KCNJ2" "PPWD1"
## [841] "ADCY9" "PKD2" "BBS4"
## [844] "CDC7" "DPYD" "FGF9"
## [847] "KIF14" "WDR47" "NT5E"
## [850] "USP12" "PIGB" "SOBP"
## [853] "GALNT10" "RHOBTB1" "AKT3"
## [856] "ECM2" "NR2F1" "PTPRE"
## [859] "L1CAM" "GNAI1" "PPP1R3C"
## [862] "GRAMD1C" "EXO1" "MITF"
## [865] "PCCA" "PPM1D" "OLFML1"
## [868] "PTH2R" "DYNC2H1" "TRIM68"
## [871] "SNCAIP" "CDON" "FRAS1"
## [874] "CLIP4" "CAND2" "GFPT2"
## [877] "WDR44" "REEP1" "C4BPA"
## [880] "ALDH1A2" "HDAC4" "CCDC93"
## [883] "BAMBI" "FLRT3" "SPOCK1"
## [886] "CENPE" "RBMS3" "KIF15"
## [889] "FCGR3B" "TRO" "CASD1"
## [892] "SEMA6D" "ZNF45" "E2F8"
## [895] "DEPDC1" "AOX1" "GCNT1"
## [898] "PTGER4" "MCM10" "OIP5"
## [901] "DPY19L1" "PTCH1" "PLCL2"
## [904] "THRA" "ME1" "CHEK1"
## [907] "EPHX2" "ITGAM" "RNASE4"
## [910] "ATP10D" "ZEB1" "TSPAN8"
## [913] "CALB2" "PDK4" "NCAPG"
## [916] "EVI5" "LRRC49" "ZNF226"
## [919] "FBXW7" "PSD3" "MCTP2"
## [922] "HSD11B1" "TSPAN5" "ALDH1A1"
## [925] "DIXDC1" "ULK2" "TBC1D12"
## [928] "NAV3" "HOXA5" "PDPN"
## [931] "LYVE1" "TRPC1" "GNE"
## [934] "KCNJ8" "THBD" "FGF13"
## [937] "MCOLN3" "RORA" "MNDA"
## [940] "MARCO" "NRXN3" "KIF23"
## [943] "ZNF671" "ANG" "OSTM1"
## [946] "CSF2RB" "TNXB" "DOCK5"
## [949] "FGF1" "CYP3A5" "PLAC8"
## [952] "GALK2" "GREB1" "KDR"
## [955] "PRKD1" "ZNF573" "CAPN3"
## [958] "WNT2B" "GPRASP1" "PACRG"
## [961] "TCEAL2" "CPEB1" "SYNE1"
## [964] "ABCA5" "TCF21" "FRY"
## [967] "LAMA2" "CORO2B" "DMD"
## [970] "SMPDL3A" "PTGER3" "MAP3K8"
## [973] "PLA2G4A" "SLC30A4" "SOCS2"
## [976] "DIRAS3" "PCSK5" "PLCE1"
## [979] "NLGN4X" "KLRB1" "ABCG2"
## [982] "PDE8B" "PGR" "RAB27B"
## [985] "KCNB1" "EFCAB1" "IL18"
## [988] "HBD" "ZBTB16" "PROCR"
## [991] "PTPN13" "PDE7B" "RTN1"
## [994] "SERPINA6" "P2RX7" "DNAH9"
## [997] "SPC25" "NR3C2" "VNN2"
## [1000] "NAP1L3" "GSTA3" "EPB41L3"
## [1003] "C6" "ADH1B" "HAS1"
## [1006] "EPB42" "GPM6A" "ADRB2"
## [1009] "C21orf62" "CLEC4M" "FCN1"
## [1012] "BMP2" "CYP39A1" "TFPI"
## [1015] "NANOG" "BNC1" "DCLK1"
## [1018] "HSD17B6" "HLF" "LGALS2"
## [1021] "ABCA8" "PIP5K1B" "PCDH9"
## [1024] "NXF3" "SGCG" "CLEC10A"
## [1027] "DSC3" "CCDC81" "PCLO"
## [1030] "KBTBD11" "GHR" "SLITRK5"
## [1033] "PID1" "ARHGAP6" "NBEA"
## [1036] "SCTR" "CCDC69" "VGLL3"
## [1039] "OGN" "ADAMTS3" "CXCL6"
## [1042] "LHX2" "CHGB" "SPAG6"
## [1045] "NAP1L2" "SOX3" "NR0B1"
## [1048] "DNAI2" "AGBL2" "OMD"
## [1051] "PCOLCE2" "COL21A1" "HSD17B2"
## [1054] "ASTN1" "CRISP3" "ADH1C"
## [1057] "BCHE" "RNF128" "DPYS"
## [1060] "RUNDC3B" "ABCB1" "NPY1R"
## [1063] "IL7" "PRG4" "PTGDR"
## [1066] "GIPC2" "CRISP2" "PTPRZ1"
## [1069] "MEOX2"
head(OV_TCGA_GSE)[, 7:12]
## correlation_batch GAPDH TUBA1B TUBA1C
## TCGA-29-1692-01B-01R-0808-01 1.0000000 12.58758 11.80367 11.75346
## TCGA-13-1488-01A-01R-0477-01 0.9141113 12.68406 11.78490 11.91617
## TCGA-13-1483-01A-01R-0477-01 0.9194885 12.58758 11.16848 11.15961
## TCGA-24-2024-01A-02R-0668-01 0.9145925 12.49987 11.62928 11.55965
## TCGA-13-0908-01B-01R-0453-01 0.9069363 12.46622 11.47308 11.57144
## TCGA-04-1341-01A-01R-0434-01 0.8990117 12.75124 11.90759 12.04805
## RPL18A COX6C
## TCGA-29-1692-01B-01R-0808-01 11.47641 11.08334
## TCGA-13-1488-01A-01R-0477-01 10.88576 11.63995
## TCGA-13-1483-01A-01R-0477-01 11.18119 11.21686
## TCGA-24-2024-01A-02R-0668-01 11.55521 10.57146
## TCGA-13-0908-01B-01R-0453-01 10.83822 12.06827
## TCGA-04-1341-01A-01R-0434-01 11.65504 10.84748
covariates<- gsub("-", ".",covariates)
colnames(OV_TCGA_GSE)[-c(1:7)]<- gsub("-", ".",colnames(OV_TCGA_GSE)[-c(1:7)])
#############################
# Univariate Cox regression##
#############################
univ_formulas <- sapply(covariates,
function(x) as.formula(paste('Surv(OV_TCGA_GSE$time, OV_TCGA_GSE$vital_status=="Dead")~', x)))
univ_models <- lapply( univ_formulas, function(x){coxph(x, data = OV_TCGA_GSE)})
## Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
## Loglik converged before variable 1 ; coefficient may be infinite.
## Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
## Loglik converged before variable 1 ; coefficient may be infinite.
cox<- summary(univ_models)
# Extract data
univ_results <- lapply(univ_models,
function(x){
x <- summary(x)
p.value<-signif(x$wald["pvalue"], digits=2)
wald.test<-signif(x$wald["test"], digits=2)
beta<-signif(x$coef[1], digits=2);#coeficient beta
HR <-signif(x$coef[2], digits=2);#exp(beta)
HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
HR <- paste0(HR, " (",
HR.confint.lower, "-", HR.confint.upper, ")")
res<-c(beta, HR, wald.test, p.value)
names(res)<-c("beta", "HR (95% CI for HR)", "wald.test",
"p.value")
return(res)
#return(exp(cbind(coef(x),confint(x))))
})
res <- t(as.data.frame(univ_results, check.names = FALSE))
res<- data.frame(res)
res$p.value<- as.numeric(res$p.value)
res<- res[order(res$p.value, decreasing = F),]
res_pvalue_TCGA_GSE<-res[res$p.value<0.05,]
str(res_pvalue_TCGA_GSE)
## 'data.frame': 237 obs. of 4 variables:
## $ beta : chr "0.33" "0.28" "0.4" "0.18" ...
## $ HR..95..CI.for.HR.: chr "1.4 (1.2-1.6)" "1.3 (1.2-1.5)" "1.5 (1.3-1.8)" "1.2 (1.1-1.3)" ...
## $ wald.test : chr "31" "23" "22" "22" ...
## $ p.value : num 2.4e-08 1.6e-06 2.5e-06 2.8e-06 6.4e-06 1.0e-05 1.9e-05 3.1e-05 3.9e-05 4.1e-05 ...
res_pvalue_TCGA_GSE$wald.test<- as.numeric(res_pvalue_TCGA_GSE$wald.test)
res_pvalue_TCGA_GSE$HR<- res_pvalue_TCGA_GSE$HR..95..CI.for.HR.
res_pvalue_TCGA_GSE$HR<- gsub("[(].*", "",res_pvalue_TCGA_GSE$HR)
res_pvalue_TCGA_GSE$HR<- as.numeric(res_pvalue_TCGA_GSE$HR)
res_pvalue_TCGA_GSE_HR<- res_pvalue_TCGA_GSE[(res_pvalue_TCGA_GSE$HR>1.1 | res_pvalue_TCGA_GSE$HR<0.9), ]
############################################################################
#Univariate significant genes obtained from univ analysis of cox regression#
############################################################################
KMgenes_TCGA_GSE<- rownames(res_pvalue_TCGA_GSE_HR)
KMgenes_TCGA_GSE # Univariates significant genes obtained from 5 datasets
## [1] "GALNT10" "FZD1" "RHOT1" "EDNRA" "RPS6KA2" "GFPT2"
## [7] "ZEB1" "ASAH1" "ECM2" "BTN3A3" "PDGFRA" "PMP22"
## [13] "PDPN" "GALNT6" "WT1" "PDIA4" "PDLIM3" "DAB2"
## [19] "PTGER3" "OMD" "SVEP1" "FJX1" "GSTP1" "PPP1R12B"
## [25] "NUCB1" "CPEB1" "KLF2" "LYVE1" "SNRPC" "DSE"
## [31] "MCM3" "CERK" "PCDH9" "ARF5" "EXOSC4" "STXBP1"
## [37] "STX12" "MRPL4" "MBNL3" "LOXL2" "FLNC" "GNG12"
## [43] "RBMS3" "PIK3R1" "KIF22" "MITF" "FANCI" "SHMT2"
## [49] "OLFML1" "FUT8" "MTSS1" "ZNF330" "SYNGR2" "PTGDR"
## [55] "USP18" "MEOX2" "PUF60" "FAM8A1" "DHRS7" "HSD17B6"
## [61] "PDLIM5" "BACE2" "GREB1" "DDR2" "PKD2" "RASA1"
## [67] "B4GALT5" "PFN1" "DDHD2" "UCP2" "BMP4" "MRPL12"
## [73] "CBX7" "UBL3" "SCTR" "TJP2" "P2RX7" "AKT3"
## [79] "GNE" "S100A13" "GPRC5B" "GRAMD1C" "RPL18A" "HMGB3"
## [85] "TIAM1" "TMED10" "OSTM1" "CTSA" "ADRB2" "BMP2"
## [91] "CCT5" "CCDC85B" "VGLL3" "THBD" "PDLIM7" "DIAPH2"
## [97] "FARSA" "ST14" "PYCR1" "CAND2" "EPB41L3" "SERPINB6"
## [103] "BCL7C" "RALA" "OVOL2" "GINS2" "PPP4C" "NXF3"
## [109] "RFXANK" "SAC3D1" "ARHGEF10" "SNRK" "GCNT1" "RNF38"
## [115] "FGF1" "BAK1" "INPP1" "MCTP2" "LSM4" "TMEM9B"
## [121] "NF1" "CALR"
dim(OV_TCGA_GSE) #982 samples and 1069 genes
## [1] 982 1069
# From here start!!!!
head(OV_TCGA_GSE)[, 1:10]
## shortLetterCode description
## TCGA-29-1692-01B-01R-0808-01 TP Serous cystadenocarcinoma, NOS
## TCGA-13-1488-01A-01R-0477-01 TP Serous cystadenocarcinoma, NOS
## TCGA-13-1483-01A-01R-0477-01 TP Serous cystadenocarcinoma, NOS
## TCGA-24-2024-01A-02R-0668-01 TP Serous cystadenocarcinoma, NOS
## TCGA-13-0908-01B-01R-0453-01 TP Serous cystadenocarcinoma, NOS
## TCGA-04-1341-01A-01R-0434-01 TP Serous cystadenocarcinoma, NOS
## time vital_status batch correlation
## TCGA-29-1692-01B-01R-0808-01 2872 Dead TCGA 1.0000000
## TCGA-13-1488-01A-01R-0477-01 2154 Dead TCGA 0.9237833
## TCGA-13-1483-01A-01R-0477-01 895 Dead TCGA 0.9287817
## TCGA-24-2024-01A-02R-0668-01 1769 Dead TCGA 0.9244550
## TCGA-13-0908-01B-01R-0453-01 1843 Dead TCGA 0.9168311
## TCGA-04-1341-01A-01R-0434-01 33 Alive TCGA 0.9105627
## correlation_batch GAPDH TUBA1B TUBA1C
## TCGA-29-1692-01B-01R-0808-01 1.0000000 12.58758 11.80367 11.75346
## TCGA-13-1488-01A-01R-0477-01 0.9141113 12.68406 11.78490 11.91617
## TCGA-13-1483-01A-01R-0477-01 0.9194885 12.58758 11.16848 11.15961
## TCGA-24-2024-01A-02R-0668-01 0.9145925 12.49987 11.62928 11.55965
## TCGA-13-0908-01B-01R-0453-01 0.9069363 12.46622 11.47308 11.57144
## TCGA-04-1341-01A-01R-0434-01 0.8990117 12.75124 11.90759 12.04805
set.seed(123) # set.seed(123). This is used in the paper
trainingdata.sample<- sample(rownames(OV_TCGA_GSE), size=round(length(rownames(OV_TCGA_GSE))*0.5,0)) # training sample
testdata.sample<- rownames(OV_TCGA_GSE)[!rownames(OV_TCGA_GSE)%in%trainingdata.sample] # test sample
set.seed(123)
cv_glmnet_cox_1<-cv.glmnet(x= as.matrix(OV_TCGA_GSE[trainingdata.sample,KMgenes_TCGA_GSE]), y= Surv(OV_TCGA_GSE[trainingdata.sample,"time"],
OV_TCGA_GSE[trainingdata.sample,]$vital_status=="Dead"), family= "cox", nfolds = 10,
alpha=1
)
set.seed(123)
cv_glmnet_cox_.5<-cv.glmnet(x= as.matrix(OV_TCGA_GSE[trainingdata.sample,KMgenes_TCGA_GSE]), y= Surv(OV_TCGA_GSE[trainingdata.sample,"time"],
OV_TCGA_GSE[trainingdata.sample,]$vital_status=="Dead"), family= "cox", nfolds = 10,
alpha=0.5
)
set.seed(123)
cv_glmnet_cox_0<-cv.glmnet(x= as.matrix(OV_TCGA_GSE[trainingdata.sample,KMgenes_TCGA_GSE]), y= Surv(OV_TCGA_GSE[trainingdata.sample,"time"],
OV_TCGA_GSE[trainingdata.sample,]$vital_status=="Dead"), family= "cox", nfolds = 10,
alpha=0
)
par(mfrow=c(2,2))
plot(cv_glmnet_cox_1); plot(cv_glmnet_cox_.5); plot(cv_glmnet_cox_0)
plot(log(cv_glmnet_cox_1$lambda), cv_glmnet_cox_1$cvm, pch=19, col= "red", xlab= "Log(L)")
points(log(cv_glmnet_cox_.5$lambda), cv_glmnet_cox_.5$cvm, pch=19, col= "grey", xlab= "Log(L)")
points(log(cv_glmnet_cox_0$lambda), cv_glmnet_cox_0$cvm, pch=19, col= "blue", xlab= "Log(L)")
legend("topright", legend= c("a=1", "a=0.5", "a=0"), col= c("red", "grey", "blue"), pch= 19,
x.intersp = .75, y.intersp = .75)

##############################################
#chosing the best fit model; alpha=1; LASSO)#
#############################################
cv_glmnet_cox<- cv_glmnet_cox_1
par(mfrow=c(1,1))
plot(cv_glmnet_cox)

plot(cv_glmnet_cox$glmnet.fit, s= cv_glmnet_cox$lambda.min, label=T)

#saveRDS(cv_glmnet_cox, file="cv_glmnet_cox_survival.rds")
coefvalues<-coefficients(cv_glmnet_cox, s= cv_glmnet_cox$lambda.min)
coefvalues<- as.matrix(coefvalues)
coefvalues<-as.data.frame(coefvalues[coefvalues[,1]!=0,])
colnames(coefvalues)<- "cv.lamda.min"
rownames(coefvalues) #
## [1] "RHOT1" "EDNRA" "RPS6KA2" "GFPT2" "ASAH1" "BTN3A3" "WT1"
## [8] "NUCB1" "LYVE1" "PIK3R1" "ZNF330" "FAM8A1" "BACE2" "GREB1"
## [15] "RASA1" "SCTR" "P2RX7" "TMED10" "DIAPH2" "INPP1"
coefvalues # coefficients along with gene
## cv.lamda.min
## RHOT1 0.178893259
## EDNRA 0.074975099
## RPS6KA2 0.145896482
## GFPT2 0.029641155
## ASAH1 0.108966294
## BTN3A3 -0.100227494
## WT1 -0.007357848
## NUCB1 0.037373759
## LYVE1 0.020082208
## PIK3R1 0.014898043
## ZNF330 -0.007485032
## FAM8A1 -0.045531972
## BACE2 -0.004701693
## GREB1 -0.016633823
## RASA1 0.104274856
## SCTR -0.040192606
## P2RX7 -0.090735836
## TMED10 -0.218236008
## DIAPH2 -0.082572791
## INPP1 -0.059699597
#write.csv(coefvalues, file= "LASSO_coef.csv")
# Check the prediction power for both variables(obtained from cv_glmnet)
#and coxgenes_merged (obtained via cox regression analysis)
mPS.Score<- predict(cv_glmnet_cox,s=c(0,cv_glmnet_cox$lambda.min, cv_glmnet_cox$lambda.1se),
newx = as.matrix(OV_TCGA_GSE[ , KMgenes_TCGA_GSE]), type= "link")
colnames(mPS.Score)<- c("lamda.all", "lamda.min", "lamda.1se")
head(mPS.Score)
## lamda.all lamda.min lamda.1se
## TCGA-29-1692-01B-01R-0808-01 -18.14502 0.07776236 0
## TCGA-13-1488-01A-01R-0477-01 -15.44010 0.09313776 0
## TCGA-13-1483-01A-01R-0477-01 -14.88338 0.25075725 0
## TCGA-24-2024-01A-02R-0668-01 -17.71287 0.10741438 0
## TCGA-13-0908-01B-01R-0453-01 -16.44914 0.23766284 0
## TCGA-04-1341-01A-01R-0434-01 -16.20803 0.48304893 0
OV.Surv_Score<-cbind.data.frame(OV_TCGA_GSE[,c(1:7)],mPS.Score)
plot(mPS.Score[,1], mPS.Score[,2])

#################################################
# mPS stratification based on High vs Low score #
#################################################
Q<-quantile(OV.Surv_Score$lamda.min)
Q
## 0% 25% 50% 75% 100%
## -0.52734240 0.08926057 0.25029223 0.41508097 1.17822658
OV.Surv_Score$lamda.min_Q<- factor(ifelse(OV.Surv_Score$lamda.min>Q[4],"Q4",
ifelse(OV.Surv_Score$lamda.min>Q[3],"Q3",
ifelse(OV.Surv_Score$lamda.min>Q[2],"Q2",
"Q1"))))
OV.Surv_Score$lamda.min_QHL<- factor(ifelse(OV.Surv_Score$lamda.min>Q[3],"High",
ifelse(OV.Surv_Score$lamda.min<Q[3],"Low",
NA)))
OV.Surv_Score$lamda.min_QHL<- relevel(OV.Surv_Score$lamda.min_QHL, ref= "Low")
par(mfrow=c(1,2))
#lamda.min_training
OV_pred<-survivalROC(Stime=OV.Surv_Score$time[rownames(OV.Surv_Score)%in%trainingdata.sample],
status= as.numeric(OV.Surv_Score$vital_status=="Dead")[rownames(OV.Surv_Score)%in%trainingdata.sample],
marker=OV.Surv_Score$lamda.min[rownames(OV.Surv_Score)%in%trainingdata.sample],
predict.time = 3*365, method= "KM"
)
plot(OV_pred$FP, OV_pred$TP, type="l", col= "red" ,xlim=c(0,1), ylim=c(0,1),
xlab=paste( "FP", "\n", "AUC = ",round(OV_pred$AUC,3)),
ylab="TP",main="mPS_lamda.all, Method = KM \n Year = 3, Sample= Trainingdata")
abline(0,1)
#lamda.min_test
OV_pred<-survivalROC(Stime=OV.Surv_Score$time[rownames(OV.Surv_Score)%in%testdata.sample],
status= as.numeric(OV.Surv_Score$vital_status=="Dead")[rownames(OV.Surv_Score)%in%testdata.sample],
marker=OV.Surv_Score$lamda.min[rownames(OV.Surv_Score)%in%testdata.sample],
predict.time = 3*365, method= "KM"
)
plot(OV_pred$FP, OV_pred$TP, type="l", col= "red" ,xlim=c(0,1), ylim=c(0,1),
xlab=paste( "FP", "\n", "AUC = ",round(OV_pred$AUC,3)),
ylab="TP",main="mPS_lamda.all, Method = KM \n Year = 3, Sample= Testdata")
abline(0,1)

# multiple line graphs
#lamda.min_Trainingdata
par(mfrow= c(1,2))
my.colors<- c("black", "violet", "blue", "green", "red")
set.seed(123)
OV_pred<-list(
"1"="",
"2"="",
"3"="",
"4"="",
"5"=""
)
for (i in 1:5) {
OV_pred[[i]]<-survivalROC(Stime=OV.Surv_Score$time[rownames(OV.Surv_Score)%in%trainingdata.sample],
status= as.numeric(OV.Surv_Score$vital_status=="Dead")[rownames(OV.Surv_Score)%in%trainingdata.sample],
marker=OV.Surv_Score$lamda.min[rownames(OV.Surv_Score)%in%trainingdata.sample],
predict.time = i*365, method= "KM"
)}
plot(OV_pred[[1]]$FP, OV_pred[[1]]$TP, type="l", col= "black" ,xlim=c(0,1), ylim=c(0,1),
xlab=paste( "FP"),
ylab="TP",main="TrainingData")
abline(0,1)
for (i in 2:5) {lines(OV_pred[[i]]$FP, OV_pred[[i]]$TP, type="l", col= my.colors[i] ,xlim=c(0,1), ylim=c(0,1))
}
txt<- c(round(OV_pred[[1]]$AUC,2),round(OV_pred[[2]]$AUC,2),round(OV_pred[[3]]$AUC,2),round(OV_pred[[4]]$AUC,2),
round(OV_pred[[5]]$AUC,2))
legend("bottomright", legend= c(1:5, txt), col= my.colors, lty=1, ncol=2, cex=0.75, title=paste0("Year", " AUC"),
y.intersp=0.75,x.intersp = 0)
#for testdata
my.colors<- c("black", "violet", "blue", "green", "red")
set.seed(123)
OV_pred<-list(
"1"="",
"2"="",
"3"="",
"4"="",
"5"=""
)
for (i in 1:5) {
OV_pred[[i]]<-survivalROC(Stime=OV.Surv_Score$time[rownames(OV.Surv_Score)%in%testdata.sample],
status= as.numeric(OV.Surv_Score$vital_status=="Dead")[rownames(OV.Surv_Score)%in%testdata.sample],
marker=OV.Surv_Score$lamda.min[rownames(OV.Surv_Score)%in%testdata.sample],
predict.time = i*365, method= "KM"
)}
plot(OV_pred[[1]]$FP, OV_pred[[1]]$TP, type="l", col= "black" ,xlim=c(0,1), ylim=c(0,1),
xlab=paste( "FP"),
ylab="TP",main="TestData")
abline(0,1)
for (i in 2:5) {lines(OV_pred[[i]]$FP, OV_pred[[i]]$TP, type="l", col= my.colors[i] ,xlim=c(0,1), ylim=c(0,1))
}
txt<- c(round(OV_pred[[1]]$AUC,2),round(OV_pred[[2]]$AUC,2),round(OV_pred[[3]]$AUC,2),round(OV_pred[[4]]$AUC,2),
round(OV_pred[[5]]$AUC,2))
legend("bottomright", legend= c(1:5, txt), col= my.colors, ncol=2,lty=1, cex=0.75, title=paste0("Year", " AUC"),
y.intersp=0.75,x.intersp = 0)

##########################
#Survival Plot#
########################
#only for trainingdata
OV.Surv_Score_Training<- OV.Surv_Score[rownames(OV.Surv_Score)%in%trainingdata.sample,]
Q<-quantile(OV.Surv_Score_Training$lamda.min)
Q
## 0% 25% 50% 75% 100%
## -0.52734240 0.08691849 0.24052934 0.41042759 1.17822658
OV.Surv_Score_Training$lamda.min_Q<- factor(ifelse(OV.Surv_Score_Training$lamda.min>Q[4],"Q4",
ifelse(OV.Surv_Score_Training$lamda.min>Q[3],"Q3",
ifelse(OV.Surv_Score_Training$lamda.min>Q[2],"Q2",
"Q1"))))
OV.Surv_Score_Training$lamda.min_QHL<- factor(ifelse(OV.Surv_Score_Training$lamda.min>Q[3],"High",
ifelse(OV.Surv_Score_Training$lamda.min<Q[3],"Low",
NA)))
fit_surv<- surv_fit(Surv(OV.Surv_Score_Training$time, OV.Surv_Score_Training$vital_status=="Dead")~ lamda.min_QHL,
data=OV.Surv_Score_Training)
fig4a<-ggsurvplot(fit_surv, conf.int=FALSE, pval=TRUE, risk.table=TRUE,
palette = c("red", "blue"),
legend.title="mPSHvsL",
legend.labs= levels(OV.Surv_Score_Training$lamda.min_QHL),
test.for.trend = F,
surv.median.line = "hv",
title="KM_TrainingData"
)
# Four grps Q1>25% cut off; Q2=25-50%, Q3=50-75%, Q4<75%
fit_surv<- surv_fit(Surv(OV.Surv_Score_Training$time, OV.Surv_Score_Training$vital_status=="Dead")~ lamda.min_Q,
data=OV.Surv_Score_Training)
fig4b<-ggsurvplot(fit_surv, conf.int=FALSE, pval=TRUE, risk.table=TRUE,
palette = c("black", "blue", "green" ,"red"),
legend.title="mPSQ",
legend.labs= levels(OV.Surv_Score_Training$lamda.min_Q),
test.for.trend = F,
surv.median.line = "hv",
title="KM_TrainingData"
)
#for testdata
OV.Surv_Score_Test<- OV.Surv_Score[rownames(OV_TCGA_GSE)%in%testdata.sample,]
Q<-quantile(OV.Surv_Score_Test$lamda.min)
Q
## 0% 25% 50% 75% 100%
## -0.52426506 0.09061088 0.25978471 0.41631065 0.95928724
OV.Surv_Score_Test$lamda.min_Q<- factor(ifelse(OV.Surv_Score_Test$lamda.min>Q[4],"Q4",
ifelse(OV.Surv_Score_Test$lamda.min>Q[3],"Q3",
ifelse(OV.Surv_Score_Test$lamda.min>Q[2],"Q2",
"Q1"))))
OV.Surv_Score_Test$lamda.min_QHL<- factor(ifelse(OV.Surv_Score_Test$lamda.min>Q[3],"High",
ifelse(OV.Surv_Score_Test$lamda.min<Q[3],"Low",
NA)))
fit_surv<- surv_fit(Surv(OV.Surv_Score_Test$time, OV.Surv_Score_Test$vital_status=="Dead")~ lamda.min_QHL,
data=OV.Surv_Score_Test)
fig4c<-ggsurvplot(fit_surv, conf.int=FALSE, pval=TRUE, risk.table=TRUE,
palette = c("red", "blue"),
legend.title="mPSHvsL",
legend.labs= levels(OV.Surv_Score_Test$lamda.min_QHL),
test.for.trend = F,
surv.median.line = "hv",
title="KM_TestData"
)
# Four grps Q1>25% cut off; Q2=25-50%, Q3=50-75%, Q4<75%
fit_surv<- surv_fit(Surv(OV.Surv_Score_Test$time, OV.Surv_Score_Test$vital_status=="Dead")~ lamda.min_Q,
data=OV.Surv_Score_Test)
fig4d<-ggsurvplot(fit_surv, conf.int=FALSE, pval=TRUE, risk.table=TRUE,
palette = c("black", "blue", "green" ,"red"),
legend.title="mPSQ",
legend.labs= levels(OV.Surv_Score_Test$lamda.min_Q),
test.for.trend = F,
surv.median.line = "hv",
title="KM_TestData"
)
fig.ggplots<- ggarrange(fig4a$plot,fig4b$plot,fig4c$plot,fig4d$plot,
labels= c("(A)", "(B)", "(C)", "(D)"),
ncol=2, nrow=2)
fig.ggplots

#
head(OV_TCGA_GSE)[, 1:10]
## shortLetterCode description
## TCGA-29-1692-01B-01R-0808-01 TP Serous cystadenocarcinoma, NOS
## TCGA-13-1488-01A-01R-0477-01 TP Serous cystadenocarcinoma, NOS
## TCGA-13-1483-01A-01R-0477-01 TP Serous cystadenocarcinoma, NOS
## TCGA-24-2024-01A-02R-0668-01 TP Serous cystadenocarcinoma, NOS
## TCGA-13-0908-01B-01R-0453-01 TP Serous cystadenocarcinoma, NOS
## TCGA-04-1341-01A-01R-0434-01 TP Serous cystadenocarcinoma, NOS
## time vital_status batch correlation
## TCGA-29-1692-01B-01R-0808-01 2872 Dead TCGA 1.0000000
## TCGA-13-1488-01A-01R-0477-01 2154 Dead TCGA 0.9237833
## TCGA-13-1483-01A-01R-0477-01 895 Dead TCGA 0.9287817
## TCGA-24-2024-01A-02R-0668-01 1769 Dead TCGA 0.9244550
## TCGA-13-0908-01B-01R-0453-01 1843 Dead TCGA 0.9168311
## TCGA-04-1341-01A-01R-0434-01 33 Alive TCGA 0.9105627
## correlation_batch GAPDH TUBA1B TUBA1C
## TCGA-29-1692-01B-01R-0808-01 1.0000000 12.58758 11.80367 11.75346
## TCGA-13-1488-01A-01R-0477-01 0.9141113 12.68406 11.78490 11.91617
## TCGA-13-1483-01A-01R-0477-01 0.9194885 12.58758 11.16848 11.15961
## TCGA-24-2024-01A-02R-0668-01 0.9145925 12.49987 11.62928 11.55965
## TCGA-13-0908-01B-01R-0453-01 0.9069363 12.46622 11.47308 11.57144
## TCGA-04-1341-01A-01R-0434-01 0.8990117 12.75124 11.90759 12.04805
m<- match(rownames(OV_TCGA_GSE), rownames(OV.Surv_Score)) # perfect match
# perfect match
OV.Surv_Score_Training$lamda.min_QHL<- relevel(OV.Surv_Score_Training$lamda.min_QHL, ref= "Low")
coefvalues$x<- c(1:nrow(coefvalues))
coefvalues<-coefvalues[order(coefvalues$cv.lamda.min, decreasing = T),]
coefvalues$x<- c(1:nrow(coefvalues))
dim(OV_TCGA_GSE)
## [1] 982 1069
head(OV_TCGA_GSE)[, 7:10]
## correlation_batch GAPDH TUBA1B TUBA1C
## TCGA-29-1692-01B-01R-0808-01 1.0000000 12.58758 11.80367 11.75346
## TCGA-13-1488-01A-01R-0477-01 0.9141113 12.68406 11.78490 11.91617
## TCGA-13-1483-01A-01R-0477-01 0.9194885 12.58758 11.16848 11.15961
## TCGA-24-2024-01A-02R-0668-01 0.9145925 12.49987 11.62928 11.55965
## TCGA-13-0908-01B-01R-0453-01 0.9069363 12.46622 11.47308 11.57144
## TCGA-04-1341-01A-01R-0434-01 0.8990117 12.75124 11.90759 12.04805
y<-OV_TCGA_GSE[,rownames(coefvalues)] # data with only geneids assocaited with lamda.min
dim(y)
## [1] 982 20
y.training<- y[rownames(OV.Surv_Score_Training),] # training samples
y.test<-y[rownames(OV.Surv_Score_Test),]
head(y.training)[, 1:5]
## RHOT1 RPS6KA2 ASAH1 RASA1 EDNRA
## TCGA-13-0908-01B-01R-0453-01 6.796971 5.097088 9.301658 7.245542 6.463844
## TCGA-04-1341-01A-01R-0434-01 7.361495 5.813764 8.703696 7.489507 5.687678
## TCGA-36-1575-01A-01R-0538-01 7.005700 5.638665 8.536276 7.886416 6.001421
## TCGA-25-2408-01A-01R-0709-01 7.232662 6.724524 9.411316 8.709715 7.433551
## TCGA-25-1877-01A-01R-0653-01 7.571181 4.747518 9.620693 8.018625 7.404528
## TCGA-61-2094-01A-01R-0668-01 6.471896 5.779449 9.734551 7.891222 5.576870
match(rownames(y.training), rownames(OV.Surv_Score_Training))
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## [217] 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## [235] 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
## [253] 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## [271] 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
## [289] 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
## [307] 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
## [325] 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
## [343] 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
## [361] 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
## [379] 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
## [397] 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
## [415] 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
## [433] 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
## [451] 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
## [469] 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486
## [487] 487 488 489 490 491
match(rownames(y.test), rownames(OV.Surv_Score_Test))
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## [217] 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## [235] 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
## [253] 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## [271] 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
## [289] 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
## [307] 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
## [325] 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
## [343] 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
## [361] 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
## [379] 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
## [397] 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
## [415] 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
## [433] 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
## [451] 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
## [469] 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486
## [487] 487 488 489 490 491
y.training<- cbind(y.training,OV.Surv_Score_Training$lamda.min,OV.Surv_Score_Training$lamda.min_QHL, OV.Surv_Score_Training$lamda.min_Q)
y.test<- cbind(y.test,OV.Surv_Score_Test$lamda.min,OV.Surv_Score_Test$lamda.min_QHL, OV.Surv_Score_Test$lamda.min_Q)
ncol(y.training)
## [1] 23
ncol(y.test)
## [1] 23
x<- ncol(y.training)-3
x.test<- ncol(y.test)-3
colnames(y.training)[-c(1:x)] <-c("mPS.lamda.min", "mPS_QHL", "mPS_Q")
colnames(y.test)[-c(1:x.test)] <-c("mPS.lamda.min", "mPS_QHL", "mPS_Q")
y.training<- y.training[order(y.training[,"mPS.lamda.min"], decreasing = T),]
y.training[,"mPS.lamda.min"]
## [1] 1.178226583 0.974262535 0.962140793 0.955170687 0.953000659
## [6] 0.866465544 0.863709849 0.821080885 0.805024397 0.803373978
## [11] 0.800112033 0.800088426 0.789026058 0.784576709 0.758187269
## [16] 0.748624185 0.748519927 0.744531149 0.741834435 0.730623952
## [21] 0.730499363 0.728757392 0.726902549 0.715452334 0.693374710
## [26] 0.667811918 0.664971602 0.660187858 0.655550204 0.647482533
## [31] 0.645173201 0.644543525 0.642283038 0.639969327 0.628516385
## [36] 0.628186940 0.626513140 0.620127594 0.612924908 0.611782867
## [41] 0.610529261 0.608295922 0.607204855 0.606368571 0.604484751
## [46] 0.602210886 0.599343346 0.598985385 0.592652431 0.592547535
## [51] 0.591890673 0.589011902 0.588274185 0.588172895 0.577380584
## [56] 0.576667979 0.574786299 0.571448976 0.569190983 0.568764405
## [61] 0.564356512 0.559335279 0.553904193 0.552063966 0.550265784
## [66] 0.548457432 0.543017247 0.540169202 0.537935256 0.537385148
## [71] 0.536123175 0.533628224 0.531774213 0.531450840 0.529569800
## [76] 0.529337240 0.522288772 0.518587072 0.516621185 0.513832654
## [81] 0.511948392 0.509198650 0.505018273 0.502826699 0.500525956
## [86] 0.499181109 0.498793585 0.496303812 0.495943735 0.483048930
## [91] 0.482611228 0.482409132 0.480304751 0.479371858 0.475674823
## [96] 0.472410063 0.472304848 0.471517091 0.470170636 0.469461330
## [101] 0.467639208 0.463559993 0.463405659 0.463344033 0.458188752
## [106] 0.458072667 0.457588742 0.457009422 0.456048125 0.453985667
## [111] 0.451576483 0.448455298 0.442972720 0.441835899 0.440091831
## [116] 0.439005925 0.437317295 0.435201122 0.426670657 0.417571659
## [121] 0.415845807 0.415399987 0.411218524 0.409636658 0.409499608
## [126] 0.408705732 0.406851252 0.406848903 0.405930788 0.403021181
## [131] 0.401202002 0.399245412 0.398993505 0.398293540 0.395530052
## [136] 0.395387062 0.395082937 0.393464349 0.391445041 0.390268913
## [141] 0.388221147 0.387946096 0.386253066 0.386007403 0.383649060
## [146] 0.383273170 0.382238620 0.382031915 0.381881341 0.380752066
## [151] 0.378259085 0.376018563 0.375252590 0.373905888 0.373616949
## [156] 0.371584721 0.369207243 0.367999656 0.367686517 0.364992071
## [161] 0.363899064 0.359723236 0.356370548 0.355412048 0.353814279
## [166] 0.353037069 0.352403073 0.351988080 0.351832404 0.350682820
## [171] 0.348015104 0.347132420 0.346856820 0.346533335 0.345901278
## [176] 0.345843233 0.344661352 0.343763518 0.341060033 0.340384392
## [181] 0.339667087 0.339447488 0.339389826 0.337757971 0.336642403
## [186] 0.335240006 0.334122064 0.332480290 0.330589717 0.328566728
## [191] 0.326792914 0.324362328 0.323556181 0.323482032 0.323317559
## [196] 0.320315694 0.319157319 0.317308057 0.317242415 0.315961777
## [201] 0.315140091 0.313458738 0.311240876 0.310927106 0.310606472
## [206] 0.309955122 0.307994488 0.303763976 0.303236007 0.302160334
## [211] 0.301404312 0.301180562 0.296130290 0.294187329 0.293971525
## [216] 0.292812310 0.290196738 0.288108875 0.285445253 0.282431453
## [221] 0.280722139 0.280198937 0.279150139 0.277844930 0.276199887
## [226] 0.272265624 0.271483391 0.270289234 0.270239266 0.270047417
## [231] 0.266873531 0.266727044 0.266357798 0.263899922 0.255279700
## [236] 0.251904765 0.249827204 0.247839807 0.247755055 0.244709827
## [241] 0.244611438 0.244488842 0.241983188 0.241727348 0.240698054
## [246] 0.240529342 0.240436495 0.240010516 0.238027550 0.237662844
## [251] 0.233907711 0.233760180 0.233419725 0.231350983 0.231185312
## [256] 0.231107140 0.231040162 0.229706958 0.229421152 0.229008468
## [261] 0.228041057 0.223347353 0.223320773 0.221015702 0.219813039
## [266] 0.218079725 0.217013383 0.215377691 0.211498359 0.209217846
## [271] 0.208437105 0.206982074 0.203386403 0.201433594 0.199451787
## [276] 0.197953850 0.197801353 0.197476368 0.197238009 0.195603591
## [281] 0.194379815 0.193261599 0.192949135 0.192935577 0.191938221
## [286] 0.191792955 0.189844710 0.189317109 0.188891175 0.187058399
## [291] 0.185966627 0.185644972 0.185327269 0.185272521 0.184832241
## [296] 0.183411312 0.180018356 0.179519503 0.178926352 0.176595771
## [301] 0.175849976 0.174725748 0.173475293 0.173438482 0.169021795
## [306] 0.164329913 0.163684091 0.163558398 0.160282530 0.159025995
## [311] 0.159021583 0.155931517 0.155086122 0.153093232 0.151937894
## [316] 0.151857716 0.151691516 0.151254473 0.148056107 0.147742454
## [321] 0.147598485 0.146413571 0.145658006 0.143509180 0.142767254
## [326] 0.140891653 0.140488776 0.138385516 0.138054599 0.137212836
## [331] 0.137136333 0.136483152 0.131889688 0.126459930 0.126315187
## [336] 0.125997595 0.124911240 0.122942039 0.122385389 0.121456478
## [341] 0.120888028 0.119214951 0.118896849 0.118353549 0.116936758
## [346] 0.116352925 0.114878429 0.114870581 0.113298971 0.111783899
## [351] 0.110823593 0.110083710 0.109592618 0.109169290 0.108884328
## [356] 0.108463099 0.107433411 0.107157816 0.107042277 0.105247477
## [361] 0.104436716 0.103928756 0.100059566 0.098479913 0.097325066
## [366] 0.094428593 0.091233986 0.087251463 0.086585507 0.083593108
## [371] 0.083101521 0.082854107 0.081669120 0.081412905 0.081380553
## [376] 0.079773645 0.077769330 0.077187840 0.076474499 0.070321590
## [381] 0.069054405 0.065625084 0.065539606 0.065403119 0.064968257
## [386] 0.059795807 0.059341733 0.056874860 0.056111969 0.055719326
## [391] 0.055411024 0.054283134 0.054263065 0.049330734 0.046535241
## [396] 0.045090791 0.044845784 0.044640074 0.036623192 0.033688004
## [401] 0.031645962 0.031067564 0.029514896 0.028954345 0.028120570
## [406] 0.027572887 0.025070533 0.022120724 0.020756312 0.015821488
## [411] 0.013446937 0.012468640 0.011813756 0.010964549 0.005553016
## [416] 0.004774123 0.003141584 -0.003982315 -0.004752782 -0.006512224
## [421] -0.006910454 -0.007534220 -0.010632302 -0.011939872 -0.012369411
## [426] -0.012440809 -0.012497556 -0.018626493 -0.034189099 -0.034266728
## [431] -0.034827224 -0.036745758 -0.038611040 -0.039123591 -0.041308484
## [436] -0.043412188 -0.048015597 -0.050186218 -0.051180312 -0.056920154
## [441] -0.057084644 -0.057245774 -0.059433005 -0.060058924 -0.063671503
## [446] -0.064778594 -0.066480355 -0.072314502 -0.073646768 -0.074490570
## [451] -0.074551019 -0.074935363 -0.078639997 -0.078987439 -0.080637636
## [456] -0.083505510 -0.084777361 -0.097351924 -0.097840272 -0.104092846
## [461] -0.104920752 -0.111054264 -0.111388281 -0.112119026 -0.119474009
## [466] -0.138684143 -0.139462830 -0.143611248 -0.149387518 -0.165823468
## [471] -0.169815610 -0.170311719 -0.171695962 -0.189538684 -0.196173560
## [476] -0.201444724 -0.207169429 -0.207732707 -0.222523929 -0.226585571
## [481] -0.227571057 -0.243313449 -0.256198081 -0.258556973 -0.270734427
## [486] -0.294921384 -0.312724842 -0.327774703 -0.392414221 -0.447158136
## [491] -0.527342399
y.test<- y.test[order(y.test[,"mPS.lamda.min"], decreasing = T),]
library(pheatmap)
library(RColorBrewer)
colfunc<-colorRampPalette(c("springgreen","yellow","red"))
annotation_col = data.frame(
mPS=as.numeric(y.training$mPS.lamda.min),
mPS_Q=factor(y.training$mPS_Q),
mPS_QHL=factor(y.training$mPS_QHL)
)
annotation_col$mPS
## [1] 1.178226583 0.974262535 0.962140793 0.955170687 0.953000659
## [6] 0.866465544 0.863709849 0.821080885 0.805024397 0.803373978
## [11] 0.800112033 0.800088426 0.789026058 0.784576709 0.758187269
## [16] 0.748624185 0.748519927 0.744531149 0.741834435 0.730623952
## [21] 0.730499363 0.728757392 0.726902549 0.715452334 0.693374710
## [26] 0.667811918 0.664971602 0.660187858 0.655550204 0.647482533
## [31] 0.645173201 0.644543525 0.642283038 0.639969327 0.628516385
## [36] 0.628186940 0.626513140 0.620127594 0.612924908 0.611782867
## [41] 0.610529261 0.608295922 0.607204855 0.606368571 0.604484751
## [46] 0.602210886 0.599343346 0.598985385 0.592652431 0.592547535
## [51] 0.591890673 0.589011902 0.588274185 0.588172895 0.577380584
## [56] 0.576667979 0.574786299 0.571448976 0.569190983 0.568764405
## [61] 0.564356512 0.559335279 0.553904193 0.552063966 0.550265784
## [66] 0.548457432 0.543017247 0.540169202 0.537935256 0.537385148
## [71] 0.536123175 0.533628224 0.531774213 0.531450840 0.529569800
## [76] 0.529337240 0.522288772 0.518587072 0.516621185 0.513832654
## [81] 0.511948392 0.509198650 0.505018273 0.502826699 0.500525956
## [86] 0.499181109 0.498793585 0.496303812 0.495943735 0.483048930
## [91] 0.482611228 0.482409132 0.480304751 0.479371858 0.475674823
## [96] 0.472410063 0.472304848 0.471517091 0.470170636 0.469461330
## [101] 0.467639208 0.463559993 0.463405659 0.463344033 0.458188752
## [106] 0.458072667 0.457588742 0.457009422 0.456048125 0.453985667
## [111] 0.451576483 0.448455298 0.442972720 0.441835899 0.440091831
## [116] 0.439005925 0.437317295 0.435201122 0.426670657 0.417571659
## [121] 0.415845807 0.415399987 0.411218524 0.409636658 0.409499608
## [126] 0.408705732 0.406851252 0.406848903 0.405930788 0.403021181
## [131] 0.401202002 0.399245412 0.398993505 0.398293540 0.395530052
## [136] 0.395387062 0.395082937 0.393464349 0.391445041 0.390268913
## [141] 0.388221147 0.387946096 0.386253066 0.386007403 0.383649060
## [146] 0.383273170 0.382238620 0.382031915 0.381881341 0.380752066
## [151] 0.378259085 0.376018563 0.375252590 0.373905888 0.373616949
## [156] 0.371584721 0.369207243 0.367999656 0.367686517 0.364992071
## [161] 0.363899064 0.359723236 0.356370548 0.355412048 0.353814279
## [166] 0.353037069 0.352403073 0.351988080 0.351832404 0.350682820
## [171] 0.348015104 0.347132420 0.346856820 0.346533335 0.345901278
## [176] 0.345843233 0.344661352 0.343763518 0.341060033 0.340384392
## [181] 0.339667087 0.339447488 0.339389826 0.337757971 0.336642403
## [186] 0.335240006 0.334122064 0.332480290 0.330589717 0.328566728
## [191] 0.326792914 0.324362328 0.323556181 0.323482032 0.323317559
## [196] 0.320315694 0.319157319 0.317308057 0.317242415 0.315961777
## [201] 0.315140091 0.313458738 0.311240876 0.310927106 0.310606472
## [206] 0.309955122 0.307994488 0.303763976 0.303236007 0.302160334
## [211] 0.301404312 0.301180562 0.296130290 0.294187329 0.293971525
## [216] 0.292812310 0.290196738 0.288108875 0.285445253 0.282431453
## [221] 0.280722139 0.280198937 0.279150139 0.277844930 0.276199887
## [226] 0.272265624 0.271483391 0.270289234 0.270239266 0.270047417
## [231] 0.266873531 0.266727044 0.266357798 0.263899922 0.255279700
## [236] 0.251904765 0.249827204 0.247839807 0.247755055 0.244709827
## [241] 0.244611438 0.244488842 0.241983188 0.241727348 0.240698054
## [246] 0.240529342 0.240436495 0.240010516 0.238027550 0.237662844
## [251] 0.233907711 0.233760180 0.233419725 0.231350983 0.231185312
## [256] 0.231107140 0.231040162 0.229706958 0.229421152 0.229008468
## [261] 0.228041057 0.223347353 0.223320773 0.221015702 0.219813039
## [266] 0.218079725 0.217013383 0.215377691 0.211498359 0.209217846
## [271] 0.208437105 0.206982074 0.203386403 0.201433594 0.199451787
## [276] 0.197953850 0.197801353 0.197476368 0.197238009 0.195603591
## [281] 0.194379815 0.193261599 0.192949135 0.192935577 0.191938221
## [286] 0.191792955 0.189844710 0.189317109 0.188891175 0.187058399
## [291] 0.185966627 0.185644972 0.185327269 0.185272521 0.184832241
## [296] 0.183411312 0.180018356 0.179519503 0.178926352 0.176595771
## [301] 0.175849976 0.174725748 0.173475293 0.173438482 0.169021795
## [306] 0.164329913 0.163684091 0.163558398 0.160282530 0.159025995
## [311] 0.159021583 0.155931517 0.155086122 0.153093232 0.151937894
## [316] 0.151857716 0.151691516 0.151254473 0.148056107 0.147742454
## [321] 0.147598485 0.146413571 0.145658006 0.143509180 0.142767254
## [326] 0.140891653 0.140488776 0.138385516 0.138054599 0.137212836
## [331] 0.137136333 0.136483152 0.131889688 0.126459930 0.126315187
## [336] 0.125997595 0.124911240 0.122942039 0.122385389 0.121456478
## [341] 0.120888028 0.119214951 0.118896849 0.118353549 0.116936758
## [346] 0.116352925 0.114878429 0.114870581 0.113298971 0.111783899
## [351] 0.110823593 0.110083710 0.109592618 0.109169290 0.108884328
## [356] 0.108463099 0.107433411 0.107157816 0.107042277 0.105247477
## [361] 0.104436716 0.103928756 0.100059566 0.098479913 0.097325066
## [366] 0.094428593 0.091233986 0.087251463 0.086585507 0.083593108
## [371] 0.083101521 0.082854107 0.081669120 0.081412905 0.081380553
## [376] 0.079773645 0.077769330 0.077187840 0.076474499 0.070321590
## [381] 0.069054405 0.065625084 0.065539606 0.065403119 0.064968257
## [386] 0.059795807 0.059341733 0.056874860 0.056111969 0.055719326
## [391] 0.055411024 0.054283134 0.054263065 0.049330734 0.046535241
## [396] 0.045090791 0.044845784 0.044640074 0.036623192 0.033688004
## [401] 0.031645962 0.031067564 0.029514896 0.028954345 0.028120570
## [406] 0.027572887 0.025070533 0.022120724 0.020756312 0.015821488
## [411] 0.013446937 0.012468640 0.011813756 0.010964549 0.005553016
## [416] 0.004774123 0.003141584 -0.003982315 -0.004752782 -0.006512224
## [421] -0.006910454 -0.007534220 -0.010632302 -0.011939872 -0.012369411
## [426] -0.012440809 -0.012497556 -0.018626493 -0.034189099 -0.034266728
## [431] -0.034827224 -0.036745758 -0.038611040 -0.039123591 -0.041308484
## [436] -0.043412188 -0.048015597 -0.050186218 -0.051180312 -0.056920154
## [441] -0.057084644 -0.057245774 -0.059433005 -0.060058924 -0.063671503
## [446] -0.064778594 -0.066480355 -0.072314502 -0.073646768 -0.074490570
## [451] -0.074551019 -0.074935363 -0.078639997 -0.078987439 -0.080637636
## [456] -0.083505510 -0.084777361 -0.097351924 -0.097840272 -0.104092846
## [461] -0.104920752 -0.111054264 -0.111388281 -0.112119026 -0.119474009
## [466] -0.138684143 -0.139462830 -0.143611248 -0.149387518 -0.165823468
## [471] -0.169815610 -0.170311719 -0.171695962 -0.189538684 -0.196173560
## [476] -0.201444724 -0.207169429 -0.207732707 -0.222523929 -0.226585571
## [481] -0.227571057 -0.243313449 -0.256198081 -0.258556973 -0.270734427
## [486] -0.294921384 -0.312724842 -0.327774703 -0.392414221 -0.447158136
## [491] -0.527342399
rownames(annotation_col)<- rownames(y.training)
annotation_col.test = data.frame(
mPS=as.numeric(y.test$mPS.lamda.min),
mPS_Q=factor(y.test$mPS_Q),
mPS_QHL=factor(y.test$mPS_QHL)
)
rownames(annotation_col.test)<- rownames(y.test)
# Specify colors
ann_colors = list(
mPS_QHL= c("Low"="black", "High"="red"),
mPS_Q= c("Q1"="black","Q2"="yellow", "Q3"="orange", "Q4"= "red")
)
dim(y.training)
## [1] 491 23
pheatmap(t(y.training[, 1:x]),
color=colfunc(ncol(t(y.training[, 1:x]))),
scale= "row", clustering_method = "ward.D2",
cluster_cols = F,cluster_rows = F,
annotation_col = annotation_col, annotation_colors = ann_colors,
show_colnames = F,
main= "TrainingData")

y.scale<- scale(y.training[,1:x], center = T, scale = T)
y.scale[1:10, 1:5]
## RHOT1 RPS6KA2 ASAH1
## GSM657645_SO404 1.7212318 3.4002202 2.5028185
## GSM657639_SO395 3.7762678 1.3545430 1.6207530
## GSM657699_SO77 2.3430109 2.6732905 1.3968130
## TCGA-61-1721-01A-01R-0653-01 1.1380233 2.3803221 1.3166308
## TCGA-13-1485-01A-02R-0477-01 0.8882425 1.9549501 1.2587135
## GSM657575_SO21 1.5026774 2.2211549 0.4168726
## GSM461399.CEL.gz 0.4572770 0.7980393 2.4185301
## GSM643035_OC01_ARN0161_s1h1s1_U133p2.CEL.gz -0.4789603 0.5555957 0.1802790
## GSM461389.CEL.gz 2.7899957 -0.4192973 0.4007161
## TCGA-24-1474-01A-01R-0502-01 2.8546261 0.1062374 -0.1119901
## RASA1 EDNRA
## GSM657645_SO404 2.0492443 1.5947094
## GSM657639_SO395 2.2067718 3.1378704
## GSM657699_SO77 2.0407125 4.5438695
## TCGA-61-1721-01A-01R-0653-01 0.8613005 3.1263114
## TCGA-13-1485-01A-02R-0477-01 1.2947314 1.5910556
## GSM657575_SO21 1.8990795 1.2725529
## GSM461399.CEL.gz 0.9325420 2.8066082
## GSM643035_OC01_ARN0161_s1h1s1_U133p2.CEL.gz -0.1843939 2.1022245
## GSM461389.CEL.gz -1.9627141 2.2035564
## TCGA-24-1474-01A-01R-0502-01 1.6215367 0.6559337
y.scale_1<- ifelse(y.scale>=2,2,
ifelse(y.scale<= -2, -2, y.scale))
pheatmap(t(y.scale_1),
color=colfunc(ncol(t(y.scale_1))),
scale= "none", clustering_method = "ward.D2",
cluster_cols = F,cluster_rows = F,
annotation_col = annotation_col, annotation_colors = ann_colors,
show_colnames = F,
main="TrainingData"
)

y.scale.test<- scale(y.test[,1:x.test], center = T, scale = T)
y.scale_1.test<- ifelse(y.scale.test>=2,2,
ifelse(y.scale.test<= -2, -2, y.scale.test))
pheatmap(t(y.scale_1.test),
color=colfunc(ncol(t(y.scale_1.test))),
scale= "none", clustering_method = "ward.D2",
cluster_cols = F,cluster_rows = F,
annotation_col = annotation_col.test, annotation_colors = ann_colors,
show_colnames = F,
main="TestData"
)

####################################
#MDS plot with 20 predictor genes#
###################################
par(mfrow= c(1,2))
set.seed(123)
plotMDS(t(y.training[, 1:x]), col= colfunc(4)[y.training$mPS_Q], pch=19, main= "MDS_TrainingData")
legend("topright", legend= levels(y.training$mPS_Q), col=colfunc(4), pch=19 )
set.seed(123)
#testdata
plotMDS(t(y.test[, 1:x.test]), col= colfunc(4)[y.test$mPS_Q], pch=19, main= "MDS_TestData")
legend("topright", legend= levels(y.test$mPS_Q), col=colfunc(4), pch=19 )

#############################################
#Clinical data association with mPS score#
############################################
library(TCGAbiolinks)
## Warning: package 'TCGAbiolinks' was built under R version 4.1.1
query <- GDCquery(project = "TCGA-OV",
data.category = "Clinical",
data.type = "Clinical Supplement",
data.format = "BCR Biotab")
## --------------------------------------
## o GDCquery: Searching in GDC database
## --------------------------------------
## Genome of reference: hg38
## --------------------------------------------
## oo Accessing GDC. This might take a while...
## --------------------------------------------
## ooo Project: TCGA-OV
## --------------------
## oo Filtering results
## --------------------
## ooo By data.format
## ooo By data.type
## ----------------
## oo Checking data
## ----------------
## ooo Check if there are duplicated cases
## Warning: There are more than one file for the same case. Please verify query results. You can use the command View(getResults(query)) in rstudio
## ooo Check if there results for the query
## -------------------
## o Preparing output
## -------------------
GDCdownload(query)
## Downloading data for project TCGA-OV
## Of the 7 files for download 7 already exist.
## All samples have been already downloaded
clinical.BCRtab.all <- GDCprepare(query)
##
|
| | 0%
|
|========== | 14%
|
|==================== | 29%
|
|============================== | 43%
|
|======================================== | 57%
|
|================================================== | 71%
|
|============================================================ | 86%
|
|======================================================================| 100%
tibble::tibble(sort(colnames(clinical.BCRtab.all$clinical_patient_ov)))
## # A tibble: 57 × 1
## `sort(colnames(clinical.BCRtab.all$clinical_patient_ov))`
## <chr>
## 1 age_at_initial_pathologic_diagnosis
## 2 anatomic_neoplasm_subdivision
## 3 bcr_patient_barcode
## 4 bcr_patient_uuid
## 5 birth_days_to
## 6 clinical_M
## 7 clinical_N
## 8 clinical_stage
## 9 clinical_T
## 10 days_to_initial_pathologic_diagnosis
## # … with 47 more rows
yy<- clinical.BCRtab.all$clinical_patient_ov
m<- match(substr(rownames(OV.Surv_Score), 1,12), yy$bcr_patient_barcode)
OV.Surv_Score$age_at_diagnosis <- as.numeric(yy$age_at_initial_pathologic_diagnosis[m])
OV.Surv_Score$age_at_diagnosis_HvsL<-factor(ifelse(OV.Surv_Score$age_at_diagnosis>=median(OV.Surv_Score$age_at_diagnosis, na.rm = T), "High", "Low"))
OV.Surv_Score$tumor_grade<- factor(yy$tumor_grade[m])
OV.Surv_Score$tumor_grade<- factor(yy$tumor_grade[m], levels= c("G1", "G2", "G3", "G4", "GB", "GX"))
OV.Surv_Score$tumor_grade_numeric<- as.numeric(OV.Surv_Score$tumor_grade)
OV.Surv_Score$tumor_grade_numeric<- as.numeric(factor(OV.Surv_Score$tumor_grade_numeric,levels= 1:4))
OV.Surv_Score$residual_disease_largest_nodule<- factor(yy$residual_disease_largest_nodule[m], levels=
c("No Macroscopic disease", "1-10 mm", "11-20 mm", ">20 mm"))
OV.Surv_Score$residual_disease_largest_nodule_numeric<- as.numeric(OV.Surv_Score$residual_disease_largest_nodule)
OV.Surv_Score$ecog_score<- factor(yy$ecog_score[m])
OV.Surv_Score$ecog_score<- factor(OV.Surv_Score$ecog_score, levels= c(0:3))
OV.Surv_Score$karnofsy_score<- factor(yy$karnofsky_score[m])
OV.Surv_Score$karnofsy_score<- factor(OV.Surv_Score$karnofsy_score, levels= c(40,60,80,100))
OV.Surv_Score$clinical_stage<- factor(yy$clinical_stage[m])
levels(OV.Surv_Score$clinical_stage)<- gsub("Stage ", "",levels(OV.Surv_Score$clinical_stage))
OV.Surv_Score$clinical_stage<- factor(OV.Surv_Score$clinical_stage,
levels= c("IA", "IB", "IC", "IIA", "IIB", "IIC", "IIIA", "IIIB", "IIIC","IV") )
OV.Surv_Score$clinical_stage_numeric<- as.numeric(OV.Surv_Score$clinical_stage)
OV.Surv_Score$lymphovascular_invasion_indicator<- factor(yy$lymphovascular_invasion_indicator[m], levels= c("NO", "YES"))
OV.Surv_Score$ethnicity<- factor(yy$ethnicity[m])
levels(OV.Surv_Score$ethnicity)<- c(NA, "HispanicORLatino", "NotHispanicORLatino")
colnames(colData(TCGA.OV))
## [1] "barcode" "patient"
## [3] "sample" "shortLetterCode"
## [5] "definition" "sample_submitter_id"
## [7] "sample_type_id" "sample_id"
## [9] "sample_type" "state"
## [11] "intermediate_dimension" "pathology_report_uuid"
## [13] "submitter_id" "shortest_dimension"
## [15] "longest_dimension" "is_ffpe"
## [17] "tissue_type" "synchronous_malignancy"
## [19] "tumor_stage" "days_to_diagnosis"
## [21] "treatments" "last_known_disease_status"
## [23] "tissue_or_organ_of_origin" "days_to_last_follow_up"
## [25] "primary_diagnosis" "age_at_diagnosis"
## [27] "prior_malignancy" "year_of_diagnosis"
## [29] "prior_treatment" "morphology"
## [31] "classification_of_tumor" "diagnosis_id"
## [33] "icd_10_code" "site_of_resection_or_biopsy"
## [35] "tumor_grade" "progression_or_recurrence"
## [37] "figo_stage" "alcohol_history"
## [39] "exposure_id" "gender"
## [41] "ethnicity" "vital_status"
## [43] "race" "age_at_index"
## [45] "days_to_birth" "year_of_birth"
## [47] "demographic_id" "year_of_death"
## [49] "days_to_death" "bcr_patient_barcode"
## [51] "primary_site" "disease_type"
## [53] "project_id" "releasable"
## [55] "name" "released"
## [57] "sample.aux" "time"
## [59] "description"
m<- match(rownames(OV.Surv_Score),TCGA.OV$barcode)
OV.Surv_Score$Figo_stage<- factor(TCGA.OV$figo_stage[m])
OV.Surv_Score$age_at_diagnosis_in_days<- TCGA.OV$age_at_diagnosis[m]
OV.Surv_Score$age_at_diagnosis_in_yrs<- OV.Surv_Score$age_at_diagnosis_in_days/365
##########################################################
#Cox regression hazrd model on clinical parameters and mPS#
###########################################################
OV.Surv_Score$age_at_diagnosis_HvsL <- factor(OV.Surv_Score$age_at_diagnosis_HvsL,levels= c("Low", "High"))
covariates <- c("age_at_diagnosis_HvsL", "clinical_stage_numeric", "tumor_grade_numeric", "ethnicity","residual_disease_largest_nodule_numeric", "lamda.min")
res.cox <- coxph(Surv(OV.Surv_Score$time, OV.Surv_Score$vital_status=="Dead") ~ clinical_stage_numeric, data =OV.Surv_Score)
summary(res.cox)
## Call:
## coxph(formula = Surv(OV.Surv_Score$time, OV.Surv_Score$vital_status ==
## "Dead") ~ clinical_stage_numeric, data = OV.Surv_Score)
##
## n= 529, number of events= 333
## (453 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## clinical_stage_numeric 0.16559 1.18008 0.05776 2.867 0.00415 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## clinical_stage_numeric 1.18 0.8474 1.054 1.322
##
## Concordance= 0.557 (se = 0.014 )
## Likelihood ratio test= 10.1 on 1 df, p=0.001
## Wald test = 8.22 on 1 df, p=0.004
## Score (logrank) test = 8.33 on 1 df, p=0.004
univ_formulas <- sapply(covariates,
function(x) as.formula(paste('Surv(OV.Surv_Score$time, OV.Surv_Score$vital_status=="Dead")~', x)))
univ_models <- lapply( univ_formulas, function(x){coxph(x, data = OV.Surv_Score)})
# Extract data
univ_results <- lapply(univ_models,
function(x){
x <- summary(x)
p.value<-signif(x$waldtest["pvalue"], digits=2)
wald.test<-signif(x$waldtest["test"], digits=2)
beta<-signif(x$coefficients[1], digits=2);#coeficient beta
HR <-signif(x$coefficients[2], digits=2);#exp(beta)
HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
HR <- paste0(HR, " (",
HR.confint.lower, "-", HR.confint.upper, ")")
res<-c(beta, HR, wald.test, p.value)
names(res)<-c("beta", "HR (95% CI for HR)", "wald.test",
"p.value")
return(res)
#return(exp(cbind(coef(x),confint(x))))
})
res <- t(as.data.frame(univ_results, check.names = FALSE))
as.data.frame(res) # age_at_diagnosis, clinical_stage, residual_disease, lamda.min are significant
## beta HR (95% CI for HR) wald.test
## age_at_diagnosis_HvsL 0.32 1.4 (1.1-1.7) 8.5
## clinical_stage_numeric 0.17 1.2 (1.1-1.3) 8.2
## tumor_grade_numeric 0.21 1.2 (0.93-1.6) 2.1
## ethnicity -0.1 0.9 (0.4-2) 0.06
## residual_disease_largest_nodule_numeric 0.28 1.3 (1.2-1.5) 25
## lamda.min 1.9 7 (5.1-9.7) 140
## p.value
## age_at_diagnosis_HvsL 0.0035
## clinical_stage_numeric 0.0041
## tumor_grade_numeric 0.15
## ethnicity 0.81
## residual_disease_largest_nodule_numeric 4.5e-07
## lamda.min 3.5e-32
#ethnicity, tumor_grade doesn't play role.
#write.csv(as.data.frame(res), file= "Univariates_clinicalparameters.csv")
#Multivariate
res.cox <- coxph(Surv(OV.Surv_Score$time, OV.Surv_Score$vital_status=="Dead") ~ age_at_diagnosis_HvsL+clinical_stage_numeric+residual_disease_largest_nodule_numeric+lamda.min , data =OV.Surv_Score)
summary(res.cox)
## Call:
## coxph(formula = Surv(OV.Surv_Score$time, OV.Surv_Score$vital_status ==
## "Dead") ~ age_at_diagnosis_HvsL + clinical_stage_numeric +
## residual_disease_largest_nodule_numeric + lamda.min, data = OV.Surv_Score)
##
## n= 473, number of events= 303
## (509 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z
## age_at_diagnosis_HvsLHigh 0.20694 1.22991 0.11829 1.749
## clinical_stage_numeric 0.09805 1.10302 0.06332 1.549
## residual_disease_largest_nodule_numeric 0.23446 1.26423 0.05875 3.991
## lamda.min 1.81531 6.14300 0.26514 6.847
## Pr(>|z|)
## age_at_diagnosis_HvsLHigh 0.0802 .
## clinical_stage_numeric 0.1215
## residual_disease_largest_nodule_numeric 6.58e-05 ***
## lamda.min 7.56e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95
## age_at_diagnosis_HvsLHigh 1.230 0.8131 0.9754
## clinical_stage_numeric 1.103 0.9066 0.9743
## residual_disease_largest_nodule_numeric 1.264 0.7910 1.1267
## lamda.min 6.143 0.1628 3.6534
## upper .95
## age_at_diagnosis_HvsLHigh 1.551
## clinical_stage_numeric 1.249
## residual_disease_largest_nodule_numeric 1.419
## lamda.min 10.329
##
## Concordance= 0.652 (se = 0.018 )
## Likelihood ratio test= 81.59 on 4 df, p=<2e-16
## Wald test = 77.63 on 4 df, p=6e-16
## Score (logrank) test = 79 on 4 df, p=3e-16
ggforest(res.cox, data= OV.Surv_Score) # showing summary of results

combined_counts<-as.matrix(OV.Surv_Score[,c("lamda.min","age_at_diagnosis", "residual_disease_largest_nodule_numeric")])
combined_counts<- combined_counts[complete.cases(combined_counts),]
head(combined_counts)[, 1:3]
## lamda.min age_at_diagnosis
## TCGA-29-1692-01B-01R-0808-01 0.07776236 58
## TCGA-13-1488-01A-01R-0477-01 0.09313776 59
## TCGA-13-1483-01A-01R-0477-01 0.25075725 61
## TCGA-24-2024-01A-02R-0668-01 0.10741438 72
## TCGA-13-0908-01B-01R-0453-01 0.23766284 58
## TCGA-36-1575-01A-01R-0538-01 0.50501827 83
## residual_disease_largest_nodule_numeric
## TCGA-29-1692-01B-01R-0808-01 3
## TCGA-13-1488-01A-01R-0477-01 3
## TCGA-13-1483-01A-01R-0477-01 4
## TCGA-24-2024-01A-02R-0668-01 2
## TCGA-13-0908-01B-01R-0453-01 2
## TCGA-36-1575-01A-01R-0538-01 1
combined_surv<- OV.Surv_Score[rownames(combined_counts),]
cv_glmnet_cox_Combined<-cv.glmnet(x= combined_counts, y= Surv(combined_surv$time, combined_surv$vital_status=="Dead"), family= "cox", nfolds =100 )
plot(cv_glmnet_cox_Combined)
plot(cv_glmnet_cox_Combined$glmnet.fit, s= cv_glmnet_cox_Combined$lambda.min)

mPS.Score_Combined<- predict(cv_glmnet_cox_Combined,s=c(0,cv_glmnet_cox_Combined$lambda.min, cv_glmnet_cox_Combined$lambda.1se),
newx = combined_counts, type= "link")
colnames(mPS.Score_Combined)<- c("lamda.all", "lamda.min", "lamda.1se")
mPS.Score_Combined<- mPS.Score_Combined[, 2:3]
colnames(mPS.Score_Combined)<- c("mPS+age+RD", "mPS+age+RD.1se")
coefvalues<-coefficients(cv_glmnet_cox_Combined, s= cv_glmnet_cox_Combined$lambda.min)
coefvalues<- as.matrix(coefvalues)
coefvalues<-as.data.frame(coefvalues[coefvalues[,1]!=0,])
colnames(coefvalues)<- "mPS+age+RD"
#write.csv(coefvalues, file= "LASSO_coef.csv")
m<-match(rownames(combined_surv),rownames(mPS.Score_Combined)) #perfect match
m
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## [217] 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## [235] 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
## [253] 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## [271] 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
## [289] 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
## [307] 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
## [325] 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
## [343] 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
## [361] 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
## [379] 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
## [397] 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
## [415] 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
## [433] 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
## [451] 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
## [469] 469 470 471 472 473 474 475
combined_surv<- cbind.data.frame(combined_surv,mPS.Score_Combined)
#lamda.min_TCGA
OV_pred<-survivalROC(Stime=combined_surv$time,
status= as.numeric(combined_surv$vital_status=="Dead"),
marker=combined_surv$lamda.min,
predict.time = 5*365, method= "KM"
)
OV_pred_combined<- list(
"mPS"=OV_pred
)
# adding age
OV_pred<-survivalROC(Stime=combined_surv$time,
status= as.numeric(combined_surv$vital_status=="Dead"),
marker=combined_surv$age_at_diagnosis,
predict.time = 5*365, method= "KM"
)
OV_pred_combined<- list(
"mPS"=OV_pred_combined$mPS,
age= OV_pred
)
# Adding residual disesase(RD)
OV_pred<-survivalROC(Stime=combined_surv$time,
status= as.numeric(combined_surv$vital_status=="Dead"),
marker=combined_surv$residual_disease_largest_nodule_numeric,
predict.time = 5*365, method= "KM"
)
OV_pred_combined<- list(
mPS=OV_pred_combined$mPS,
age=OV_pred_combined$age,
Residual= OV_pred
)
# Adding all parameters combined
OV_pred<-survivalROC(Stime=combined_surv$time,
status= as.numeric(combined_surv$vital_status=="Dead"),
marker=combined_surv$`mPS+age+RD`,
predict.time = 5*365, method= "KM"
)
OV_pred_combined<- list(
mPS=OV_pred_combined$mPS,
age=OV_pred_combined$age,
RD=OV_pred_combined$Residual,
"mPS+age+RD"= OV_pred
)
par(mfrow=c(1,1))
plot(OV_pred_combined[[1]]$FP, OV_pred_combined[[1]]$TP, type="l", col= "black" ,xlim=c(0,1), ylim=c(0,1),
xlab=paste("FP"),
ylab="TP",main="AUC, Method = KM \n Year =5, Sample= TCGA")
abline(0,1)
my.colors<- c("black", "blue", "green", "red")
for (i in 2:4) {lines(OV_pred_combined[[i]]$FP, OV_pred_combined[[i]]$TP, type="l", col= my.colors[i] ,xlim=c(0,1), ylim=c(0,1))
}
txt<- c(round(OV_pred_combined[[1]]$AUC,2),round(OV_pred_combined[[2]]$AUC,2),round(OV_pred_combined[[3]]$AUC,2),round(OV_pred_combined[[4]]$AUC,2)
)
legend("bottomright", legend= c(names(OV_pred_combined),txt), col= my.colors, lty=1, ncol=2, cex=0.75, title=paste0(" ", "AUC"),
y.intersp=0.75,x.intersp =0)

#SDE genes between high vs low prognostic score
OV_TCGA_GSE.Counts_All<- y.tcga.gse_cor_batch
head(OV_TCGA_GSE.Counts_All)[, 1:5]
## TCGA-29-1692-01B-01R-0808-01 TCGA-13-1488-01A-01R-0477-01
## EEF1A1 13.03324 13.25729
## RPS3A 12.92130 12.53616
## RPS4X 12.27286 12.24965
## RPS27 12.19733 12.09124
## RPL39 12.75188 12.50652
## RPS23 12.55604 12.43668
## TCGA-13-1483-01A-01R-0477-01 TCGA-24-2024-01A-02R-0668-01
## EEF1A1 12.98532 13.15260
## RPS3A 12.81662 12.92130
## RPS4X 12.87456 12.65051
## RPS27 12.22907 12.47443
## RPL39 12.75188 12.53887
## RPS23 12.34817 12.34817
## TCGA-13-0908-01B-01R-0453-01
## EEF1A1 12.94473
## RPS3A 12.69725
## RPS4X 12.42533
## RPS27 12.29580
## RPL39 12.75188
## RPS23 12.05903
dim(OV_TCGA_GSE.Counts_All)
## [1] 10225 1016
OV_TCGA_GSE.Counts_All<- OV_TCGA_GSE.Counts_All[,rownames(OV.Surv_Score)]
design<- model.matrix(~OV.Surv_Score$batch+OV.Surv_Score$lamda.min_QHL, data=OV.Surv_Score)
dim(design)
## [1] 982 7
colnames(design)
## [1] "(Intercept)" "OV.Surv_Score$batchGSE14764"
## [3] "OV.Surv_Score$batchGSE18520" "OV.Surv_Score$batchGSE26193"
## [5] "OV.Surv_Score$batchGSE26712" "OV.Surv_Score$batchGSE63855"
## [7] "OV.Surv_Score$lamda.min_QHLHigh"
OV_TCGA_GSE.Counts_All<- OV_TCGA_GSE.Counts_All[, rownames(design)]
fit<- lmFit(OV_TCGA_GSE.Counts_All, design)
fit.ebayes<- eBayes(fit)
summary(decideTests(fit.ebayes))
## (Intercept) OV.Surv_Score$batchGSE14764 OV.Surv_Score$batchGSE18520
## Down 0 0 290
## NotSig 0 10225 9882
## Up 10225 0 53
## OV.Surv_Score$batchGSE26193 OV.Surv_Score$batchGSE26712
## Down 0 11
## NotSig 10225 10192
## Up 0 22
## OV.Surv_Score$batchGSE63855 OV.Surv_Score$lamda.min_QHLHigh
## Down 0 2453
## NotSig 10225 5784
## Up 0 1988
colnames(fit.ebayes)
## [1] "(Intercept)" "OV.Surv_Score$batchGSE14764"
## [3] "OV.Surv_Score$batchGSE18520" "OV.Surv_Score$batchGSE26193"
## [5] "OV.Surv_Score$batchGSE26712" "OV.Surv_Score$batchGSE63855"
## [7] "OV.Surv_Score$lamda.min_QHLHigh"
topTable(fit.ebayes, coef=7)
## logFC AveExpr t P.Value adj.P.Val B
## DCHS1 0.5050088 5.661414 14.40849 7.957704e-43 8.136752e-39 86.28429
## PDGFRB 0.4682045 6.071064 12.60355 7.369996e-34 3.767910e-30 65.97661
## FSTL1 0.6923946 8.919050 12.24437 3.564924e-32 1.215045e-28 62.16388
## RPS6KA2 0.4686437 5.569783 12.03339 3.350834e-31 8.565570e-28 59.96194
## BTN3A3 -0.5787439 5.790756 -11.92691 1.027109e-30 1.998127e-27 58.86132
## PCOLCE 0.7639884 7.262415 11.91428 1.172495e-30 1.998127e-27 58.73124
## TCF4 0.6098881 7.725724 11.83212 2.766664e-30 4.041306e-27 57.88776
## VAT1 0.3891708 8.367604 11.77586 4.968365e-30 6.350191e-27 57.31260
## GJB1 -0.4791447 6.000926 -11.75952 5.886913e-30 6.669130e-27 57.14594
## ZEB1 0.5629934 5.032878 11.74964 6.522376e-30 6.669130e-27 57.04524
m<- match(rownames(fit.ebayes), egSymbol$symbol)
fit.ebayes[["geneinfo"]]<- list(
gene_symbol=rownames(fit.ebayes),
gene_id=egSymbol$gene_id[m]
)
results<-decideTests(fit.ebayes, coef=7)
summary(results[,7])
## OV.Surv_Score$lamda.min_QHLHigh
## Down 2453
## NotSig 5784
## Up 1988
n=summary(results)[1,7]+summary(results)[3,7]
results<- data.frame(results[,7])
m<- match(rownames(results),egSymbol$symbol)
results$gene_id<-egSymbol$gene_id[m]
topDEgenes<-topTable(fit.ebayes,coef=7,number=n)
m<- match(rownames(topDEgenes), egSymbol$symbol)
topDEgenes$gene_id<- egSymbol$gene_id[m]
#write.csv(topDEgenes, file= "topDEgenes_HighvsLowPrognosticScore2.csv")
par(mfrow=c(1,1))
plotMD(fit.ebayes, coef=7, status=decideTests(fit.ebayes[,7], lfc= log2(1.2)))
abline(h=log2(1.2), col= "green")
abline(h=-log2(1.2), col= "green")

par(mfrow=c(2,5))
for (i in 1:10) {
plot(OV_TCGA_GSE.Counts_All[rownames(topDEgenes)[i], ]~OV.Surv_Score$lamda.min_QHL,
xlab= "", ylab= "log2.expression", main= rownames(topDEgenes)[i])
}

go<-goana.MArrayLM(fit.ebayes, geneid = fit.ebayes$geneinfo$gene_id, species= "Hs")
topGO(go, ontology = "BP", sort="down")
## Term Ont N Up Down
## GO:0034660 ncRNA metabolic process BP 261 18 131
## GO:0140053 mitochondrial gene expression BP 92 4 61
## GO:0006281 DNA repair BP 360 30 159
## GO:0034470 ncRNA processing BP 224 16 112
## GO:0006974 cellular response to DNA damage stimulus BP 577 72 222
## GO:0032543 mitochondrial translation BP 77 3 51
## GO:0006259 DNA metabolic process BP 605 78 228
## GO:0034641 cellular nitrogen compound metabolic process BP 3938 724 1106
## GO:0045333 cellular respiration BP 135 6 73
## GO:0006396 RNA processing BP 568 74 213
## GO:0006399 tRNA metabolic process BP 80 1 49
## GO:0022613 ribonucleoprotein complex biogenesis BP 283 22 120
## GO:0070125 mitochondrial translational elongation BP 55 2 37
## GO:0022904 respiratory electron transport chain BP 82 3 47
## GO:0042254 ribosome biogenesis BP 191 16 86
## GO:0006415 translational termination BP 65 2 40
## GO:0044786 cell cycle DNA replication BP 51 2 34
## GO:0006414 translational elongation BP 78 2 45
## GO:0070126 mitochondrial translational termination BP 54 2 35
## GO:0006261 DNA-dependent DNA replication BP 105 6 55
## P.Up P.Down
## GO:0034660 1.0000000 1.442977e-20
## GO:0140053 0.9999956 7.262668e-18
## GO:0006281 1.0000000 9.732623e-18
## GO:0034470 1.0000000 1.366295e-17
## GO:0006974 0.9999981 1.035702e-15
## GO:0032543 0.9999894 3.994494e-15
## GO:0006259 0.9999956 6.135721e-15
## GO:0034641 0.9849578 1.408078e-14
## GO:0045333 0.9999999 3.770885e-14
## GO:0006396 0.9999873 9.390683e-14
## GO:0006399 1.0000000 1.188029e-12
## GO:0022613 1.0000000 3.504547e-12
## GO:0070125 0.9999050 1.202226e-11
## GO:0022904 0.9999960 9.281457e-11
## GO:0042254 0.9999941 1.087645e-10
## GO:0006415 0.9999874 1.148338e-10
## GO:0044786 0.9997887 1.209002e-10
## GO:0006414 0.9999991 1.728442e-10
## GO:0070126 0.9998839 2.108727e-10
## GO:0006261 0.9999874 2.694761e-10
topGO(go, ontology = "BP", sort="up")
## Term Ont N Up Down
## GO:0009653 anatomical structure morphogenesis BP 1937 594 301
## GO:0072359 circulatory system development BP 812 300 118
## GO:0006928 movement of cell or subcellular component BP 1493 460 238
## GO:0001944 vasculature development BP 558 221 86
## GO:0001568 blood vessel development BP 537 214 82
## GO:0048856 anatomical structure development BP 3912 981 724
## GO:0048731 system development BP 3309 851 588
## GO:0007275 multicellular organism development BP 3629 917 663
## GO:0035295 tube development BP 790 278 124
## GO:0035239 tube morphogenesis BP 644 239 103
## GO:0040011 locomotion BP 1310 405 209
## GO:0030198 extracellular matrix organization BP 308 141 33
## GO:0043062 extracellular structure organization BP 309 141 33
## GO:0032502 developmental process BP 4218 1030 821
## GO:0045229 external encapsulating structure organization BP 310 141 33
## GO:0048514 blood vessel morphogenesis BP 481 190 72
## GO:0016477 cell migration BP 1097 349 169
## GO:0050793 regulation of developmental process BP 1708 487 323
## GO:0048870 cell motility BP 1177 362 191
## GO:0051674 localization of cell BP 1177 362 191
## P.Up P.Down
## GO:0009653 2.423355e-40 1.0000000
## GO:0072359 3.072481e-34 1.0000000
## GO:0006928 1.798259e-30 1.0000000
## GO:0001944 5.858553e-30 0.9999999
## GO:0001568 1.907934e-29 0.9999999
## GO:0048856 2.176890e-29 1.0000000
## GO:0048731 7.195580e-28 1.0000000
## GO:0007275 7.716825e-28 1.0000000
## GO:0035295 1.402058e-27 1.0000000
## GO:0035239 1.942823e-27 0.9999999
## GO:0040011 8.347261e-27 1.0000000
## GO:0030198 1.419141e-26 1.0000000
## GO:0043062 2.128803e-26 1.0000000
## GO:0032502 2.214493e-26 1.0000000
## GO:0045229 3.184664e-26 1.0000000
## GO:0048514 1.335427e-25 0.9999998
## GO:0016477 2.470658e-25 1.0000000
## GO:0050793 1.183719e-23 1.0000000
## GO:0048870 2.657739e-23 1.0000000
## GO:0051674 2.657739e-23 1.0000000
#write.csv(topGO(go, ontology = "BP", number = 50L), file= "topGO_HighvsLowRiskGrop.csv")
#write.csv(topGO(go, ontology = "BP", sort="up", number = 20L), file= "topGO_HighvsLowRiskGrop_sortUP.csv")
kegg<- kegga.MArrayLM(fit.ebayes, geneid = fit.ebayes$geneinfo$gene_id, species= "Hs")
topKEGG(kegg) #TGF-B, PI3K-Akt,SignalingPathys_PluropotencyStemCells,FocalAdhesion-Allup
## Pathway N Up Down
## path:hsa04510 Focal adhesion 176 79 16
## path:hsa04612 Antigen processing and presentation 58 2 37
## path:hsa01100 Metabolic pathways 1076 139 340
## path:hsa04360 Axon guidance 149 60 17
## path:hsa05206 MicroRNAs in cancer 145 58 20
## path:hsa03030 DNA replication 32 0 23
## path:hsa00190 Oxidative phosphorylation 88 6 45
## path:hsa05200 Pathways in cancer 455 134 87
## path:hsa05330 Allograft rejection 32 1 22
## path:hsa01200 Carbon metabolism 96 4 46
## path:hsa05205 Proteoglycans in cancer 181 64 31
## path:hsa04932 Non-alcoholic fatty liver disease 135 22 59
## path:hsa04512 ECM-receptor interaction 76 34 4
## path:hsa05208 Chemical carcinogenesis - reactive oxygen species 165 29 68
## path:hsa05332 Graft-versus-host disease 32 2 21
## path:hsa05415 Diabetic cardiomyopathy 154 29 64
## path:hsa04520 Adherens junction 63 29 7
## path:hsa04010 MAPK signaling pathway 259 82 35
## path:hsa05414 Dilated cardiomyopathy 83 35 5
## path:hsa04151 PI3K-Akt signaling pathway 296 91 40
## P.Up P.Down
## path:hsa04510 8.679302e-15 9.999999e-01
## path:hsa04612 9.999480e-01 1.272035e-10
## path:hsa01100 1.000000e+00 1.124493e-09
## path:hsa04360 2.917801e-09 9.999702e-01
## path:hsa05206 7.172715e-09 9.992293e-01
## path:hsa03030 1.000000e+00 1.392580e-08
## path:hsa00190 9.997822e-01 2.956985e-08
## path:hsa05200 1.011437e-07 9.953293e-01
## path:hsa05330 9.990221e-01 1.044251e-07
## path:hsa01200 9.999979e-01 2.570191e-07
## path:hsa05205 3.022980e-07 9.904237e-01
## path:hsa04932 8.512767e-01 3.263393e-07
## path:hsa04512 4.239156e-07 9.999979e-01
## path:hsa05208 7.580725e-01 6.254447e-07
## path:hsa05332 9.914412e-01 6.821026e-07
## path:hsa05415 6.090733e-01 9.369385e-07
## path:hsa04520 1.445486e-06 9.970510e-01
## path:hsa04010 1.482491e-06 9.999923e-01
## path:hsa05414 1.599650e-06 9.999974e-01
## path:hsa04151 1.651256e-06 9.999981e-01
#CellCycle, DNAreplication,BaseExcisionrepair-Alldown
#write.csv(topKEGG(kegg, number = 50L), file= "topKEGG_HighvsLowRiskGrop.csv")
###############################################
#MANUALLY assigning KEGGPATWAYS#
#BARCODE PLOT#
###############################################
keggpathways<- getKEGGPathwayNames("hsa")
x<- getGeneKEGGLinks("hsa", convert = T)
par(mfrow= c(2,3))
#1. TGF-B
x.1<- x[x$PathwayID=="path:hsa04350",]
idfound<-topDEgenes$gene_id %in% x.1$GeneID
table(idfound)
## idfound
## FALSE TRUE
## 4390 51
barcodeplot(statistics = topDEgenes$logFC, index= idfound,
xlab= "logFC", main="hsa04350:TGF-beta signaling pathway",
cex.main=1) # p.up of TGF-b
#2.PI3K-Akt signaling
x.1<- x[x$PathwayID=="path:hsa04151",]
idfound<- topDEgenes$gene_id %in% x.1$GeneID
table(idfound)
## idfound
## FALSE TRUE
## 4310 131
barcodeplot(statistics = topDEgenes$logFC, index= idfound,
xlab= "logFC", main="hsa04151:PI3K-AktSignalingPathway",
cex.main=1) #p.up more significant
#3.path:hsa04550 Signaling pathways regulating pluripotency of stem cells
x.1<- x[x$PathwayID=="path:hsa04550",]
idfound<- topDEgenes$gene_id%in% x.1$GeneID
table(idfound)
## idfound
## FALSE TRUE
## 4370 71
barcodeplot(statistics = topDEgenes$logFC, index= idfound,
xlab= "logFC", main="hsa04550:StemCellGenes",
cex.main=1) # p.up
#4.path:hsa04110 Cell cycle
x.1<- x[x$PathwayID=="path:hsa04110",]
idfound<- topDEgenes$gene_id%in% x.1$GeneID
table(idfound)
## idfound
## FALSE TRUE
## 4384 57
barcodeplot(statistics = topDEgenes$logFC, index= idfound,
xlab= "logFC", main="hsa04110:Cell cycle",
cex.main=1) # p.down
#5.path:hsa03030_DNA replication
x.1<- x[x$PathwayID=="path:hsa03030",]
idfound<- topDEgenes$gene_id%in% x.1$GeneID
table(idfound)
## idfound
## FALSE TRUE
## 4418 23
barcodeplot(statistics = topDEgenes$logFC, index= idfound,
xlab= "logFC", main="hsa03030:DNA replication",
cex.main=1) # p.down
#6.path:hsa03410_Base excision repair
x.1<- x[x$PathwayID=="path:hsa03410",]
idfound<- topDEgenes$gene_id%in% x.1$GeneID
table(idfound)
## idfound
## FALSE TRUE
## 4421 20
barcodeplot(statistics = topDEgenes$logFC, index= idfound,
xlab= "logFC", main="hsa03410:Base excision repair",
cex.main=1) # p.down

#7.path:hsa04620_TLR pathways
x.1<- x[x$PathwayID=="path:hsa04620",]
idfound<- topDEgenes$gene_id%in% x.1$GeneID
table(idfound)
## idfound
## FALSE TRUE
## 4399 42
barcodeplot(statistics = topDEgenes$logFC, index= idfound,
xlab= "logFC", main="hsa04620:TLR pathways",
cex.main=1) # p.down
#8.path:hsa04660_TCR Signaling pathways
x.1<- x[x$PathwayID=="path:hsa04660",]
idfound<- topDEgenes$gene_id%in% x.1$GeneID
table(idfound)
## idfound
## FALSE TRUE
## 4394 47
barcodeplot(statistics = topDEgenes$logFC, index= idfound,
xlab= "logFC", main="hsa04660:TCR Signaling pathways",
cex.main=1) # no.change
#9.path:hsa04657_IL-17Signaling pathways
x.1<- x[x$PathwayID=="path:hsa04657",]
idfound<- topDEgenes$gene_id%in% x.1$GeneID
table(idfound)
## idfound
## FALSE TRUE
## 4405 36
barcodeplot(statistics = topDEgenes$logFC, index= idfound,
xlab= "logFC", main="hsa04657:IL-17 Signaling pathways",
cex.main=1)
#10.path:hsa04218_Cellular senesence Signaling pathways
x.1<- x[x$PathwayID=="path:hsa04218",]
idfound<- topDEgenes$gene_id%in% x.1$GeneID
table(idfound)
## idfound
## FALSE TRUE
## 4372 69
barcodeplot(statistics = topDEgenes$logFC, index= idfound,
xlab= "logFC", main="hsa04218:CellularSenesence Signaling pathways",
cex.main=1)
#11.path:hsa04215_ApoptosisSignaling pathways
x.1<- x[x$PathwayID=="path:hsa04215",]
idfound<- topDEgenes$gene_id%in% x.1$GeneID
table(idfound)
## idfound
## FALSE TRUE
## 4430 11
barcodeplot(statistics = topDEgenes$logFC, index= idfound,
xlab= "logFC", main="hsa04215:Apoptosis Signaling pathways",
cex.main=1)
#12.path:hsa04140_Autophagy Signaling pathways
x.1<- x[x$PathwayID=="path:hsa04140",]
idfound<- topDEgenes$gene_id%in% x.1$GeneID
table(idfound)
## idfound
## FALSE TRUE
## 4390 51
barcodeplot(statistics = topDEgenes$logFC, index= idfound,
xlab= "logFC", main="hsa04140:Autophagy Signaling pathways",
cex.main=1)

#arranging columns based on mPS score
dim(OV_TCGA_GSE.Counts_All)
## [1] 10225 982
OV.Surv_Score<-OV.Surv_Score[order(OV.Surv_Score$lamda.min, decreasing=T),]
OV_TCGA_GSE.Counts_All<- OV_TCGA_GSE.Counts_All[, rownames(OV.Surv_Score)]
annotation_col = data.frame(
mPS=as.numeric(OV.Surv_Score$lamda.min),
mPS_Q=factor(OV.Surv_Score$lamda.min_Q),
mPS_QHL=factor(OV.Surv_Score$lamda.min_QHL)
)
annotation_col$mPS
## [1] 1.1782265834 0.9742625355 0.9621407932 0.9592872375 0.9551706872
## [6] 0.9530006589 0.8664655442 0.8637098494 0.8609184925 0.8508249698
## [11] 0.8426213875 0.8405285598 0.8293429513 0.8210808851 0.8075573060
## [16] 0.8050243967 0.8033739781 0.8001120326 0.8000884257 0.7958043464
## [21] 0.7890260585 0.7845767088 0.7822693028 0.7801785385 0.7581872692
## [26] 0.7562607026 0.7486241851 0.7485199273 0.7481539917 0.7445311485
## [31] 0.7419727410 0.7418344350 0.7404514046 0.7317391610 0.7306239520
## [36] 0.7304993633 0.7287573922 0.7269025495 0.7242863023 0.7178723458
## [41] 0.7175170882 0.7171148687 0.7154523345 0.7154318378 0.7101594405
## [46] 0.7058770818 0.7039343706 0.6933747102 0.6824321402 0.6678119177
## [51] 0.6649716019 0.6601878578 0.6555502043 0.6474825333 0.6451732009
## [56] 0.6445435252 0.6422830381 0.6419586328 0.6399693274 0.6362359633
## [61] 0.6342085762 0.6293001504 0.6285163848 0.6281869398 0.6265131396
## [66] 0.6201275943 0.6192512668 0.6180698702 0.6129249076 0.6127998098
## [71] 0.6117828673 0.6105292611 0.6088437753 0.6082959220 0.6081110395
## [76] 0.6072048551 0.6063685711 0.6050582492 0.6044847508 0.6030332538
## [81] 0.6022108863 0.5993433460 0.5989853850 0.5975445488 0.5926524309
## [86] 0.5925475353 0.5918906730 0.5913701551 0.5904267729 0.5890119021
## [91] 0.5885743798 0.5882741846 0.5881728951 0.5880565575 0.5873865229
## [96] 0.5850336134 0.5829824647 0.5816431534 0.5787497813 0.5773805841
## [101] 0.5766679791 0.5765878886 0.5747862989 0.5738546207 0.5725458721
## [106] 0.5718965878 0.5718857869 0.5714489756 0.5691909828 0.5687644053
## [111] 0.5656885659 0.5650258158 0.5643565123 0.5593352795 0.5570774937
## [116] 0.5551597430 0.5539041934 0.5520639655 0.5502657835 0.5500487490
## [121] 0.5490529163 0.5484574319 0.5466731068 0.5430740389 0.5430172469
## [126] 0.5429670483 0.5405773680 0.5401692017 0.5381952076 0.5379352558
## [131] 0.5373851478 0.5361231752 0.5355842209 0.5349885335 0.5336282241
## [136] 0.5328966781 0.5319542200 0.5319178929 0.5318616820 0.5317742126
## [141] 0.5314508401 0.5307940372 0.5295698003 0.5293372402 0.5290977627
## [146] 0.5278213711 0.5265099210 0.5222887724 0.5185870715 0.5178240827
## [151] 0.5166211849 0.5144115237 0.5138326542 0.5136979132 0.5119483921
## [156] 0.5103119617 0.5091986500 0.5079900135 0.5050182733 0.5028266991
## [161] 0.5017024148 0.5005259562 0.4997734866 0.4996141700 0.4991811093
## [166] 0.4987935854 0.4987779470 0.4963038119 0.4959437350 0.4859578066
## [171] 0.4850107853 0.4830489299 0.4828302969 0.4826112281 0.4824091317
## [176] 0.4816926526 0.4803047507 0.4794053655 0.4793718578 0.4756748235
## [181] 0.4742116266 0.4725935610 0.4724100626 0.4723048476 0.4718440373
## [186] 0.4715170909 0.4701706358 0.4694613295 0.4676392077 0.4661387553
## [191] 0.4641708160 0.4635599935 0.4634056593 0.4633440330 0.4612194892
## [196] 0.4590667252 0.4581887523 0.4580726673 0.4575887418 0.4575857330
## [201] 0.4572924855 0.4570094219 0.4560481252 0.4548299136 0.4544355969
## [206] 0.4539856666 0.4537488677 0.4529844193 0.4515764828 0.4512234128
## [211] 0.4511787794 0.4484552982 0.4476079640 0.4448422181 0.4445005514
## [216] 0.4429727198 0.4427721652 0.4418358988 0.4400918312 0.4397112323
## [221] 0.4390059254 0.4380512874 0.4373172946 0.4354654145 0.4353000973
## [226] 0.4352011221 0.4343031083 0.4336350561 0.4323548273 0.4316327800
## [231] 0.4313418818 0.4308574779 0.4308174735 0.4303962872 0.4280892049
## [236] 0.4266706569 0.4255468681 0.4235406649 0.4228761623 0.4221363998
## [241] 0.4215576305 0.4175716593 0.4170354214 0.4158458070 0.4155858703
## [246] 0.4153999868 0.4141239121 0.4118329821 0.4112185235 0.4107497399
## [251] 0.4104352029 0.4102810455 0.4096366582 0.4094996078 0.4087215298
## [256] 0.4087057321 0.4081298597 0.4072016433 0.4068512515 0.4068489029
## [261] 0.4067677094 0.4059307882 0.4055276631 0.4054963963 0.4034890596
## [266] 0.4030211808 0.4017713988 0.4012020017 0.4011870178 0.3994539376
## [271] 0.3992454119 0.3989935048 0.3982935399 0.3965768268 0.3955300521
## [276] 0.3953870621 0.3950829371 0.3949347663 0.3934643489 0.3933938929
## [281] 0.3914450409 0.3902689134 0.3886775995 0.3882211471 0.3879460956
## [286] 0.3869450471 0.3862530664 0.3860074031 0.3836490601 0.3833877100
## [291] 0.3832731698 0.3825307235 0.3822386197 0.3820319150 0.3818813415
## [296] 0.3815307093 0.3810986695 0.3807658145 0.3807520660 0.3794541794
## [301] 0.3790954108 0.3782627257 0.3782590847 0.3781014967 0.3779839350
## [306] 0.3773111280 0.3764109491 0.3760185629 0.3752525903 0.3739058880
## [311] 0.3736589315 0.3736169488 0.3735514257 0.3734887715 0.3717767377
## [316] 0.3715847206 0.3703233159 0.3692072431 0.3684812716 0.3679996556
## [321] 0.3678702689 0.3676865171 0.3673917597 0.3666426633 0.3654533052
## [326] 0.3649920706 0.3647268009 0.3638990641 0.3632316025 0.3627744926
## [331] 0.3600263691 0.3599526150 0.3597232356 0.3594246610 0.3592131577
## [336] 0.3577666659 0.3571680908 0.3571137952 0.3563705475 0.3554120480
## [341] 0.3541349938 0.3538142786 0.3531403537 0.3530370694 0.3524030733
## [346] 0.3519880801 0.3519118485 0.3518324038 0.3508916393 0.3506828200
## [351] 0.3506552505 0.3504533290 0.3480151041 0.3471324204 0.3468948498
## [356] 0.3468568199 0.3465333347 0.3459012782 0.3458432333 0.3453976818
## [361] 0.3446613519 0.3442303207 0.3439475344 0.3437635184 0.3429148530
## [366] 0.3424796563 0.3410600329 0.3403843921 0.3396670872 0.3394474883
## [371] 0.3394289424 0.3393898257 0.3377579706 0.3366424026 0.3352400057
## [376] 0.3346416795 0.3345611215 0.3341220642 0.3327175920 0.3324802897
## [381] 0.3309794466 0.3305897174 0.3302147012 0.3295497088 0.3291691584
## [386] 0.3288165622 0.3285667281 0.3267929143 0.3265659436 0.3246935004
## [391] 0.3243623285 0.3235561808 0.3234820316 0.3233615260 0.3233175588
## [396] 0.3203156942 0.3196572834 0.3191573189 0.3182451357 0.3173080572
## [401] 0.3172424153 0.3159617767 0.3152998070 0.3151400913 0.3141587611
## [406] 0.3134587381 0.3124064712 0.3115831421 0.3112408764 0.3109271059
## [411] 0.3106064723 0.3099551224 0.3080008145 0.3079944878 0.3078945376
## [416] 0.3073779401 0.3056495116 0.3037639763 0.3032360067 0.3023203606
## [421] 0.3022060230 0.3021603335 0.3017725607 0.3014043124 0.3011805622
## [426] 0.3004068446 0.3002473509 0.2961302898 0.2959678110 0.2958015362
## [431] 0.2946017509 0.2941873294 0.2939715246 0.2928123101 0.2927168366
## [436] 0.2920608676 0.2911988490 0.2901967376 0.2881088746 0.2873055366
## [441] 0.2868730541 0.2859093643 0.2854452532 0.2851032628 0.2846259915
## [446] 0.2824314527 0.2820866142 0.2811931742 0.2811252443 0.2807221393
## [451] 0.2801989369 0.2801318649 0.2800262722 0.2791501390 0.2784189915
## [456] 0.2778449303 0.2773333477 0.2770937472 0.2761998872 0.2722656240
## [461] 0.2714833909 0.2703430516 0.2702892342 0.2702392658 0.2701327759
## [466] 0.2700474172 0.2696277359 0.2693852955 0.2683426366 0.2668735312
## [471] 0.2667270439 0.2663577982 0.2652907635 0.2652382522 0.2638999221
## [476] 0.2633712510 0.2628675428 0.2602835749 0.2601046509 0.2597847119
## [481] 0.2595501765 0.2593934274 0.2581736557 0.2581700590 0.2574889206
## [486] 0.2553922086 0.2552797002 0.2549838388 0.2529589855 0.2519047647
## [491] 0.2507572508 0.2498272037 0.2485062619 0.2478398067 0.2477550553
## [496] 0.2472787331 0.2457522294 0.2447098267 0.2446114379 0.2445548336
## [501] 0.2444888416 0.2440945563 0.2419831877 0.2417273483 0.2406980543
## [506] 0.2405293424 0.2404364954 0.2400105158 0.2397433814 0.2396799433
## [511] 0.2382119441 0.2380275501 0.2376628436 0.2360454422 0.2354824450
## [516] 0.2339077113 0.2337601801 0.2334197248 0.2325396785 0.2313509834
## [521] 0.2311853123 0.2311071399 0.2310550612 0.2310401619 0.2297069582
## [526] 0.2294211524 0.2290084683 0.2280410566 0.2269880585 0.2254537619
## [531] 0.2246044709 0.2236113541 0.2233473532 0.2233207730 0.2232793752
## [536] 0.2230380883 0.2224452549 0.2218739812 0.2218057188 0.2216892212
## [541] 0.2210514461 0.2210157024 0.2209638644 0.2198697887 0.2198130388
## [546] 0.2195385209 0.2180797252 0.2173512274 0.2170133835 0.2161431930
## [551] 0.2153776908 0.2132494608 0.2118165565 0.2114983588 0.2092178463
## [556] 0.2090544346 0.2084371052 0.2079274982 0.2073648331 0.2069820739
## [561] 0.2062634191 0.2062403258 0.2033864034 0.2020957881 0.2015373910
## [566] 0.2014335942 0.2012669541 0.1994517868 0.1979538499 0.1978013529
## [571] 0.1974763681 0.1972380094 0.1957085339 0.1956035912 0.1944864763
## [576] 0.1944140178 0.1943798147 0.1942104725 0.1932615990 0.1929491348
## [581] 0.1929355766 0.1919382207 0.1917929546 0.1898447102 0.1893171086
## [586] 0.1888911747 0.1875123320 0.1870744219 0.1870583993 0.1859666269
## [591] 0.1856449725 0.1853272695 0.1852725214 0.1848322410 0.1845264178
## [596] 0.1834193621 0.1834113117 0.1827933313 0.1803753074 0.1800183562
## [601] 0.1795195026 0.1789263522 0.1787582773 0.1765957710 0.1765020675
## [606] 0.1758499757 0.1747257478 0.1741553941 0.1736273185 0.1734752927
## [611] 0.1734384821 0.1711977842 0.1690217946 0.1682364353 0.1675652039
## [616] 0.1662936957 0.1651768534 0.1650765745 0.1648855935 0.1643299132
## [621] 0.1638131663 0.1636840905 0.1635583980 0.1630589553 0.1623780875
## [626] 0.1602825296 0.1600808687 0.1600333310 0.1590259954 0.1590215829
## [631] 0.1580714900 0.1576159928 0.1574214488 0.1572622911 0.1566467251
## [636] 0.1559315165 0.1550861223 0.1547064436 0.1537414799 0.1536031362
## [641] 0.1530932318 0.1519378937 0.1518577163 0.1516915155 0.1512544732
## [646] 0.1480561071 0.1477424536 0.1475984849 0.1464254703 0.1464135710
## [651] 0.1456580059 0.1435091803 0.1433036321 0.1427672539 0.1410487269
## [656] 0.1409365537 0.1408916529 0.1404887763 0.1387920976 0.1383855160
## [661] 0.1380545992 0.1378195993 0.1376661003 0.1372128362 0.1371363326
## [666] 0.1364831519 0.1353430826 0.1347984870 0.1346450284 0.1346137367
## [671] 0.1319665840 0.1318896884 0.1293816370 0.1288214984 0.1264599301
## [676] 0.1263151870 0.1259975951 0.1258695008 0.1249112401 0.1237706912
## [681] 0.1237040504 0.1233057562 0.1230431818 0.1229420393 0.1223853892
## [686] 0.1214564782 0.1208880276 0.1202206609 0.1192149512 0.1188968485
## [691] 0.1183535491 0.1169367582 0.1163529252 0.1162713238 0.1158727264
## [696] 0.1149517025 0.1148784291 0.1148705814 0.1132989708 0.1132538917
## [701] 0.1119466917 0.1117838990 0.1117306701 0.1108235933 0.1103070587
## [706] 0.1100837096 0.1095926184 0.1094195839 0.1091692897 0.1090869407
## [711] 0.1088843278 0.1084630989 0.1084142588 0.1078661481 0.1074334108
## [716] 0.1074143757 0.1071578158 0.1070422766 0.1068507532 0.1052474774
## [721] 0.1044367158 0.1041347254 0.1039287563 0.1032474028 0.1024844361
## [726] 0.1000595662 0.0987138687 0.0985269905 0.0984799133 0.0973250663
## [731] 0.0967031294 0.0944285930 0.0931377582 0.0912339857 0.0910567377
## [736] 0.0901650309 0.0889590850 0.0872514633 0.0866660134 0.0865855071
## [741] 0.0858351603 0.0857888447 0.0843083987 0.0842533047 0.0835931077
## [746] 0.0831015207 0.0830587188 0.0828541072 0.0816691197 0.0814156505
## [751] 0.0814129053 0.0813805535 0.0797736454 0.0797270423 0.0777693303
## [756] 0.0777623577 0.0776816297 0.0771878401 0.0764744986 0.0753210353
## [761] 0.0703215897 0.0691591779 0.0690544048 0.0671701843 0.0660158292
## [766] 0.0656250837 0.0655396056 0.0654031189 0.0649682570 0.0637908306
## [771] 0.0623014205 0.0614831595 0.0606548649 0.0597958071 0.0593417330
## [776] 0.0585614464 0.0584453537 0.0568748599 0.0561119690 0.0557193261
## [781] 0.0554110236 0.0542831342 0.0542630649 0.0531436630 0.0522198132
## [786] 0.0513444564 0.0493307344 0.0469224200 0.0468711763 0.0465352407
## [791] 0.0451358513 0.0450907913 0.0448457841 0.0446400743 0.0445159673
## [796] 0.0413595538 0.0391529770 0.0373120770 0.0366231918 0.0336880040
## [801] 0.0316459624 0.0312996173 0.0310675636 0.0295148959 0.0289543454
## [806] 0.0281205702 0.0275728869 0.0254882685 0.0250705334 0.0237867918
## [811] 0.0233783422 0.0221207239 0.0219986556 0.0215581695 0.0207563122
## [816] 0.0202852678 0.0158214884 0.0134469372 0.0124686402 0.0122678940
## [821] 0.0118137556 0.0109645494 0.0068609495 0.0055530162 0.0047741233
## [826] 0.0038125624 0.0031415837 0.0012224256 0.0005809713 0.0005123169
## [831] -0.0008166294 -0.0013639825 -0.0025423853 -0.0026929558 -0.0039823150
## [836] -0.0047527818 -0.0051450141 -0.0065122242 -0.0069104543 -0.0071005801
## [841] -0.0072400297 -0.0075342198 -0.0105540616 -0.0106323021 -0.0111242360
## [846] -0.0119398724 -0.0123694113 -0.0124408091 -0.0124975564 -0.0186264932
## [851] -0.0214997762 -0.0217440427 -0.0218670944 -0.0263171488 -0.0341890993
## [856] -0.0342667284 -0.0344997323 -0.0348272237 -0.0367457580 -0.0377356517
## [861] -0.0386110405 -0.0387889615 -0.0391235913 -0.0407804967 -0.0413084844
## [866] -0.0434121884 -0.0455470783 -0.0462323217 -0.0480155974 -0.0485062684
## [871] -0.0494706903 -0.0497303958 -0.0501862179 -0.0511803119 -0.0525105488
## [876] -0.0542340685 -0.0557802857 -0.0565144187 -0.0569201544 -0.0570846436
## [881] -0.0572457738 -0.0594330054 -0.0600589237 -0.0612300420 -0.0612376663
## [886] -0.0636715033 -0.0647785937 -0.0664803547 -0.0691765469 -0.0705784689
## [891] -0.0707420002 -0.0714183346 -0.0723145022 -0.0730460766 -0.0731578080
## [896] -0.0736467685 -0.0744905696 -0.0745510193 -0.0749353628 -0.0773518204
## [901] -0.0786399966 -0.0789874393 -0.0806376363 -0.0835055100 -0.0847773610
## [906] -0.0911112395 -0.0928303867 -0.0965177754 -0.0973519238 -0.0975965277
## [911] -0.0977497999 -0.0978402717 -0.0992522951 -0.1026482087 -0.1040928457
## [916] -0.1049207516 -0.1054756439 -0.1055289357 -0.1055351273 -0.1067349179
## [921] -0.1110542638 -0.1112869194 -0.1113882814 -0.1121190257 -0.1189274026
## [926] -0.1194740088 -0.1206689988 -0.1207859942 -0.1248770292 -0.1257310468
## [931] -0.1269940186 -0.1290933468 -0.1341899745 -0.1379386084 -0.1386841426
## [936] -0.1394628297 -0.1436112476 -0.1478528240 -0.1493875184 -0.1501635001
## [941] -0.1616904918 -0.1658234676 -0.1668788975 -0.1690534325 -0.1698156102
## [946] -0.1703117193 -0.1716959622 -0.1734493967 -0.1790781758 -0.1895386836
## [951] -0.1961735600 -0.2014447238 -0.2023641429 -0.2048938819 -0.2071694286
## [956] -0.2077327065 -0.2202263256 -0.2225239289 -0.2265855710 -0.2275710570
## [961] -0.2313608439 -0.2366489703 -0.2433134489 -0.2561980809 -0.2564799647
## [966] -0.2585569727 -0.2707344269 -0.2792153490 -0.2949213840 -0.3051361496
## [971] -0.3127248418 -0.3169822939 -0.3172382123 -0.3277747027 -0.3414482559
## [976] -0.3481379614 -0.3752930118 -0.3766705430 -0.3924142210 -0.4471581365
## [981] -0.5242650558 -0.5273423988
rownames(annotation_col)<- rownames(OV.Surv_Score)
# Specify colors
ann_colors = list(
mPS_QHL= c("Low"="black", "High"="red"),
mPS_Q= c("Q1"="black","Q2"="yellow", "Q3"="orange", "Q4"= "red")
)
topDEgenes<- topDEgenes[order(topDEgenes$logFC, decreasing=T),]
dim(topDEgenes)
## [1] 4441 7
summary(decideTests(fit.ebayes))
## (Intercept) OV.Surv_Score$batchGSE14764 OV.Surv_Score$batchGSE18520
## Down 0 0 290
## NotSig 0 10225 9882
## Up 10225 0 53
## OV.Surv_Score$batchGSE26193 OV.Surv_Score$batchGSE26712
## Down 0 11
## NotSig 10225 10192
## Up 0 22
## OV.Surv_Score$batchGSE63855 OV.Surv_Score$lamda.min_QHLHigh
## Down 0 2453
## NotSig 10225 5784
## Up 0 1988
pheatmap(OV_TCGA_GSE.Counts_All[rownames(topDEgenes)[c(1:20, nrow(topDEgenes):(nrow(topDEgenes)-20))],],
color=colfunc(ncol(OV_TCGA_GSE.Counts_All)),
scale= "row", clustering_method = "ward.D2",
cluster_cols = F,cluster_rows = F,
annotation_col = annotation_col, annotation_colors = ann_colors,
show_colnames = F,
main= "Top20DE genes good vs poor prognosis")

#MDSplot
par(mfrow= c(1,2))
my.colors<- c("black","blue", "green","red")
set.seed(123)
plotMDS(OV_TCGA_GSE.Counts_All[rownames(topDEgenes),], col= my.colors[c(1,4)][OV.Surv_Score$lamda.min_QHL], pch= 20)
legend("topleft", legend= levels(OV.Surv_Score$lamda.min_QHL), col=my.colors[c(1,4)], pch=19 )
set.seed(123)
plotMDS(OV_TCGA_GSE.Counts_All[rownames(topDEgenes),], col= my.colors[1:4][OV.Surv_Score$lamda.min_Q], pch= 20)
legend("topleft", legend= levels(OV.Surv_Score$lamda.min_Q), col=my.colors[1:4], pch=19 )

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] TCGAbiolinks_2.20.1 RColorBrewer_1.1-2
## [3] SummarizedExperiment_1.22.0 GenomicRanges_1.44.0
## [5] GenomeInfoDb_1.28.4 MatrixGenerics_1.4.3
## [7] matrixStats_0.61.0 pheatmap_1.0.12
## [9] survivalROC_1.0.3 ROCR_1.0-11
## [11] glmnet_4.1-3 Matrix_1.4-0
## [13] org.Hs.eg.db_3.13.0 AnnotationDbi_1.54.1
## [15] IRanges_2.26.0 S4Vectors_0.30.2
## [17] Biobase_2.52.0 BiocGenerics_0.38.0
## [19] limma_3.48.3 survminer_0.4.9
## [21] ggpubr_0.4.0 ggplot2_3.3.5
## [23] survival_3.2-13
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-2 ggsignif_0.6.3
## [3] ellipsis_0.3.2 XVector_0.32.0
## [5] gridtext_0.1.4 ggtext_0.1.1
## [7] rstudioapi_0.13 farver_2.1.0
## [9] bit64_4.0.5 fansi_1.0.2
## [11] xml2_1.3.3 R.methodsS3_1.8.1
## [13] codetools_0.2-18 splines_4.1.0
## [15] cachem_1.0.6 knitr_1.37
## [17] jsonlite_1.7.3 broom_0.7.12
## [19] km.ci_0.5-2 dbplyr_2.1.1
## [21] GO.db_3.13.0 R.oo_1.24.0
## [23] png_0.1-7 readr_2.1.2
## [25] compiler_4.1.0 httr_1.4.2
## [27] backports_1.4.1 assertthat_0.2.1
## [29] fastmap_1.1.0 cli_3.1.1
## [31] prettyunits_1.1.1 htmltools_0.5.2
## [33] tools_4.1.0 gtable_0.3.0
## [35] glue_1.6.1 GenomeInfoDbData_1.2.6
## [37] dplyr_1.0.7 rappdirs_0.3.3
## [39] Rcpp_1.0.8 carData_3.0-5
## [41] jquerylib_0.1.4 vctrs_0.3.8
## [43] Biostrings_2.60.2 iterators_1.0.13
## [45] xfun_0.29 stringr_1.4.0
## [47] rvest_1.0.2 lifecycle_1.0.1
## [49] XML_3.99-0.8 statmod_1.4.36
## [51] rstatix_0.7.0 zlibbioc_1.38.0
## [53] zoo_1.8-9 scales_1.1.1
## [55] vroom_1.5.7 hms_1.1.1
## [57] curl_4.3.2 yaml_2.2.2
## [59] memoise_2.0.1 gridExtra_2.3
## [61] downloader_0.4 KMsurv_0.1-5
## [63] sass_0.4.0 biomaRt_2.48.3
## [65] stringi_1.7.6 RSQLite_2.2.9
## [67] highr_0.9 foreach_1.5.1
## [69] filelock_1.0.2 shape_1.4.6
## [71] rlang_1.0.0 pkgconfig_2.0.3
## [73] bitops_1.0-7 evaluate_0.14
## [75] TCGAbiolinksGUI.data_1.12.0 lattice_0.20-45
## [77] purrr_0.3.4 labeling_0.4.2
## [79] cowplot_1.1.1 bit_4.0.4
## [81] tidyselect_1.1.1 plyr_1.8.6
## [83] magrittr_2.0.2 R6_2.5.1
## [85] generics_0.1.1 DelayedArray_0.18.0
## [87] DBI_1.1.2 pillar_1.6.5
## [89] withr_2.4.3 KEGGREST_1.32.0
## [91] abind_1.4-5 RCurl_1.98-1.5
## [93] tibble_3.1.6 crayon_1.4.2
## [95] car_3.0-12 survMisc_0.5.5
## [97] utf8_1.2.2 BiocFileCache_2.0.0
## [99] tzdb_0.2.0 rmarkdown_2.11
## [101] progress_1.2.2 grid_4.1.0
## [103] data.table_1.14.2 blob_1.2.2
## [105] digest_0.6.29 xtable_1.8-4
## [107] tidyr_1.1.4 R.utils_2.11.0
## [109] munsell_0.5.0 bslib_0.3.1