CoxglmnetPredictorgenes_Final

###############################################################################
#The molecular prognostic score, a classifier for risk straification of HGSOC#
#Sarkar et al, E-mail: siddik.sarkar@iicbres.in
###############################################################################


#TCGA_GEO Ovarian cancer database
library(survival)
library(survminer)

## Loading required package: ggplot2

## Warning in register(): Can't find generic `scale_type` in package ggplot2 to
## register S3 method.

## Loading required package: ggpubr

## 
## Attaching package: 'survminer'

## The following object is masked from 'package:survival':
## 
##     myeloma

library(limma)
library(ggpubr)
library(AnnotationDbi)

## Loading required package: stats4

## Loading required package: BiocGenerics

## Loading required package: parallel

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB

## The following object is masked from 'package:limma':
## 
##     plotMA

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min

## Loading required package: Biobase

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

## Loading required package: IRanges

## Loading required package: S4Vectors

## Warning: package 'S4Vectors' was built under R version 4.1.1

## 
## Attaching package: 'S4Vectors'

## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname

library(org.Hs.eg.db)

##

library(glmnet)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following object is masked from 'package:S4Vectors':
## 
##     expand

## Loaded glmnet 4.1-3

library(ROCR)
library(survivalROC)
library(pheatmap)

#####################################################
#Collect RDS that are already saved in other folders#
#####################################################

GSE_26712 <- readRDS("~/Desktop/IICB/Glmnet_SurvivalGenes/SE_26712.rds")
GSE_18520 <- readRDS("~/Desktop/IICB/Glmnet_SurvivalGenes/SE_GSE18520.rds")
TCGA.OV <- readRDS("~/Desktop/IICB/Glmnet_SurvivalGenes/TCGA.OV.rds")
GSE_26193<- readRDS("~/Desktop/IICB/Glmnet_SurvivalGenes/GSE26193.rds")
GSE_14764<- readRDS("~/Desktop/IICB/Glmnet_SurvivalGenes/GSE14764.rds")
GSE_63885<- readRDS("~/Desktop/IICB/Glmnet_SurvivalGenes/GSE63885.rds")



#Remove duplicate gene_ids
o<- order(rowSums(TCGA.OV@assays@data$normalized), decreasing = T)
TCGA.OV<- TCGA.OV[o,]

## Loading required package: SummarizedExperiment

## Loading required package: MatrixGenerics

## Warning: package 'MatrixGenerics' was built under R version 4.1.1

## Loading required package: matrixStats

## 
## Attaching package: 'matrixStats'

## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians

## 
## Attaching package: 'MatrixGenerics'

## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars

## The following object is masked from 'package:Biobase':
## 
##     rowMedians

## Loading required package: GenomicRanges

## Loading required package: GenomeInfoDb

## Warning: package 'GenomeInfoDb' was built under R version 4.1.1

head(assay(TCGA.OV))[, 1:10]

##        TCGA-29-1692-01B-01R-0808-01 TCGA-13-1488-01A-01R-0477-01
## EEF1A1                     12.88355                     12.62906
## RPS3A                      13.00679                     12.15549
## RPS3A                      13.00679                     12.15549
## RPS4X                      12.52005                     12.02120
## RPS27                      12.71712                     12.04733
## RPL39                      12.88478                     12.13985
##        TCGA-13-1483-01A-01R-0477-01 TCGA-24-2024-01A-02R-0668-01
## EEF1A1                     12.74414                     13.07086
## RPS3A                      12.83631                     13.13192
## RPS3A                      12.83631                     13.13192
## RPS4X                      12.98435                     12.92062
## RPS27                      12.66582                     12.92439
## RPL39                      12.80318                     12.75421
##        TCGA-13-0908-01B-01R-0453-01 TCGA-04-1341-01A-01R-0434-01
## EEF1A1                     12.33280                     12.43170
## RPS3A                      12.44088                     12.67937
## RPS3A                      12.44088                     12.67937
## RPS4X                      12.15694                     12.53605
## RPS27                      12.22548                     12.79412
## RPL39                      12.45119                     12.63017
##        TCGA-13-1489-02A-01R-0808-01 TCGA-36-1575-01A-01R-0538-01
## EEF1A1                     13.02541                     12.65636
## RPS3A                      13.02833                     12.51563
## RPS3A                      13.02833                     12.51563
## RPS4X                      12.86771                     12.24898
## RPS27                      13.03593                     12.08975
## RPL39                      13.19170                     12.06427
##        TCGA-13-0803-01A-01R-0362-01 TCGA-25-2408-01A-01R-0709-01
## EEF1A1                     12.61793                     12.89962
## RPS3A                      12.23207                     12.77040
## RPS3A                      12.23207                     12.77040
## RPS4X                      12.48291                     12.94298
## RPS27                      12.25968                     12.78388
## RPL39                      12.87438                     12.70745

d<- duplicated(rownames(TCGA.OV))
table(d)

## d
## FALSE  TRUE 
## 10878   702

TCGA.OV<- TCGA.OV[!d,]
###############################
#Similarly for GSE datasets#
###############################
o<- order(rowSums(assay(GSE_26712)), decreasing = T)
GSE_26712<- GSE_26712[o,]
head(assay(GSE_26712))[, 1:10]

##        GSM657519_HOSE2237 GSM657520_HOSE2008 GSM657521_HOSE2061
## EEF1A1           13.47900           13.47900           13.11366
## TPT1             13.29893           13.32458           12.68414
## RPL37A           13.00495           12.96124           12.45180
## RPS18            12.95925           12.58468           12.36975
## RPL41            13.17338           13.16965           12.76300
## RPL13A           12.34042           12.05152           11.85875
##        GSM657522_HOSE2064 GSM657523_HOSE2085 GSM657524_HOSE2225
## EEF1A1           13.22784           12.90234           13.33805
## TPT1             12.89589           12.68356           13.35606
## RPL37A           12.71304           12.45182           12.93762
## RPS18            12.28234           11.85091           12.91837
## RPL41            13.10460           12.59513           13.20533
## RPL13A           11.77486           11.59822           12.12697
##        GSM657525_HOSE2226 GSM657526_HOSE2228 GSM657527_HOSE2230
## EEF1A1           13.41927           13.38104           13.27282
## TPT1             13.27536           13.31731           13.25633
## RPL37A           13.01197           12.94455           12.77350
## RPS18            12.85196           12.78197           12.82530
## RPL41            13.13413           13.19052           13.16812
## RPL13A           12.06184           12.20157           12.11931
##        GSM657528_HOSE2234
## EEF1A1           13.48788
## TPT1             13.56379
## RPL37A           12.92421
## RPS18            13.03841
## RPL41            13.12579
## RPL13A           12.38129

d<- duplicated(rownames(GSE_26712))
table(d)

## d
## FALSE 
## 12645

GSE_26712<- GSE_26712[!d,]

o<- order(rowSums(assay(GSE_18520)), decreasing = T)
GSE_18520<- GSE_18520[o,]
head(assay(GSE_18520))[, 1:10]

##        GSM461348.CEL.gz GSM461349.CEL.gz GSM461350.CEL.gz GSM461351.CEL.gz
## RPL41          14.27943         14.21090         13.90982         14.38014
## EEF1A1         14.22060         13.68581         13.72749         14.34786
## RPL37A         13.98879         13.78062         13.50181         14.08039
## ND4            14.13128         13.68930         13.36868         14.16025
## TPT1           14.12233         13.59100         13.18768         14.05185
## RPS20          13.84227         13.86572         13.32865         13.99806
##        GSM461352.CEL.gz GSM461353.CEL.gz GSM461354.CEL.gz GSM461355.CEL.gz
## RPL41          14.40031         14.37727         14.18398         14.12652
## EEF1A1         13.67195         14.27973         14.26522         13.92270
## RPL37A         13.71422         13.98134         14.14058         13.69619
## ND4            13.83797         13.51943         14.49082         13.54138
## TPT1           13.67562         13.65820         13.95461         13.54869
## RPS20          13.63017         13.69952         13.77505         13.76030
##        GSM461356.CEL.gz GSM461357.CEL.gz
## RPL41          13.75686         14.72293
## EEF1A1         13.56270         14.59740
## RPL37A         13.39989         14.44599
## ND4            13.36689         14.31416
## TPT1           13.54526         14.27828
## RPS20          13.18255         14.06773

d<- duplicated(rownames(GSE_18520))
table(d)

## d
## FALSE 
## 20864

GSE_18520<- GSE_18520[!d,]

o<- order(rowSums(exprs(GSE_26193)), decreasing = T)
GSE_26193<- GSE_26193[o,]
head(exprs(GSE_26193))[, 1:10]

##        GSM642933_OC01_ARN0001_s1h1s1_U133p2.CEL.gz
## ND4                                       14.18139
## COX1                                      13.58479
## RPL41                                     14.16913
## ATP6                                      14.09860
## EEF1A1                                    14.12426
## RPL37A                                    13.95109
##        GSM642935_OC01_ARN0004_s1h1s1_U133p2.CEL.gz
## ND4                                       14.13805
## COX1                                      13.85712
## RPL41                                     14.24466
## ATP6                                      13.90537
## EEF1A1                                    14.01791
## RPL37A                                    14.08491
##        GSM642936_OC01_ARN0005_s1h1s1_U133p2.CEL.gz
## ND4                                       14.21738
## COX1                                      13.85340
## RPL41                                     14.10495
## ATP6                                      14.01890
## EEF1A1                                    14.09997
## RPL37A                                    13.95778
##        GSM642937_OC01_ARN0007_s1h1s1_U133p2.CEL.gz
## ND4                                       14.34638
## COX1                                      14.19546
## RPL41                                     14.05138
## ATP6                                      14.20252
## EEF1A1                                    13.94882
## RPL37A                                    13.95467
##        GSM642938_OC01_ARN0008_s1h1s1_U133p2.CEL.gz
## ND4                                       13.73810
## COX1                                      14.03803
## RPL41                                     14.40365
## ATP6                                      13.81328
## EEF1A1                                    14.13210
## RPL37A                                    14.03432
##        GSM642939_OC01_ARN0009_s1h1s1_U133p2.CEL.gz
## ND4                                       14.44950
## COX1                                      14.08644
## RPL41                                     14.08412
## ATP6                                      14.41309
## EEF1A1                                    13.87356
## RPL37A                                    14.04848
##        GSM642940_OC01_ARN0011_s1h1s1_U133p2.CEL.gz
## ND4                                       14.29389
## COX1                                      13.92715
## RPL41                                     14.08640
## ATP6                                      14.26453
## EEF1A1                                    14.15267
## RPL37A                                    14.01831
##        GSM642941_OC01_ARN0012_s1h1s1_U133p2.CEL.gz
## ND4                                       13.97111
## COX1                                      13.57598
## RPL41                                     14.22355
## ATP6                                      13.88233
## EEF1A1                                    14.13613
## RPL37A                                    13.92573
##        GSM642943_OC01_ARN0014_s1h1s1_U133p2.CEL.gz
## ND4                                       14.29530
## COX1                                      13.96928
## RPL41                                     14.00965
## ATP6                                      14.18433
## EEF1A1                                    13.84907
## RPL37A                                    13.83259
##        GSM642944_OC01_ARN0015_s1h1s1_U133p2.CEL.gz
## ND4                                       14.15642
## COX1                                      13.92579
## RPL41                                     14.13404
## ATP6                                      14.14130
## EEF1A1                                    13.88436
## RPL37A                                    14.02862

d<- duplicated(rownames(GSE_26193))
table(d)

## d
## FALSE 
## 20864

GSE_26193<- GSE_26193[!d,]

o<- order(rowSums(exprs(GSE_14764)), decreasing = T)
GSE_14764<- GSE_14764[o,]
head(exprs(GSE_14764))[, 1:10]

##        GSM368661.cel.gz GSM368662.cel.gz GSM368664.cel.gz GSM368665.cel.gz
## RPL41          13.36209         13.37945         13.68061         13.66169
## EEF1A1         13.05101         13.10914         13.45291         13.09676
## RPL23A         13.21419         13.29762         13.22367         13.25454
## RPL37A         13.25228         12.70039         13.12462         12.87569
## RPL13A         13.16902         12.88275         13.11861         13.03930
## RPS18          12.82202         12.88801         13.42103         12.85953
##        GSM368666.cel.gz GSM368667.cel.gz GSM368668.cel.gz GSM368670.cel.gz
## RPL41          13.64596         13.61178         13.55445         13.62603
## EEF1A1         13.64948         13.44657         13.26581         13.48810
## RPL23A         13.50547         13.62354         13.54157         13.41085
## RPL37A         13.23872         12.50211         12.49782         13.12422
## RPL13A         13.21223         12.65268         12.64409         12.76070
## RPS18          13.24162         12.95164         12.65370         12.96781
##        GSM368671.cel.gz GSM368673.cel.gz
## RPL41          13.75162         13.64442
## EEF1A1         13.65655         13.59476
## RPL23A         13.52068         13.67517
## RPL37A         13.14057         13.17259
## RPL13A         13.38848         13.38097
## RPS18          13.33783         13.12293

d<- duplicated(rownames(GSE_14764))
table(d)

## d
## FALSE 
## 12645

GSE_14764<- GSE_14764[!d,]

o<- order(rowSums(exprs(GSE_63885)), decreasing = T)
GSE_63885<- GSE_63885[o,]
head(exprs(GSE_63885))[, 1:10]

##        GSM1559299_P8.CEL.gz GSM1559300_P264.CEL.gz GSM1559301_T348.CEL.gz
## RPL41              14.14668               14.29164               14.18414
## RPL37A             14.16374               14.06431               14.12732
## RPS18              14.05659               13.84077               13.84848
## EEF1A1             13.99776               14.02816               14.15655
## ND4                14.07159               14.06878               14.15734
## RPL13A             14.14679               13.26606               13.66358
##        GSM1559302_P67.CEL.gz GSM1559303_T175.CEL.gz GSM1559304_T360.CEL.gz
## RPL41               14.29392               14.26875               14.19185
## RPL37A              14.27454               14.21328               14.14387
## RPS18               14.03103               14.20205               14.11986
## EEF1A1              14.05293               14.13090               14.15776
## ND4                 14.01997               14.04059               13.75544
## RPL13A              13.99426               13.90925               13.99475
##        GSM1559305_T382.CEL.gz GSM1559306_P343.CEL.gz GSM1559307_T174.CEL.gz
## RPL41                14.15513               14.05765               14.32762
## RPL37A               14.13234               14.05478               14.26893
## RPS18                14.05817               14.21275               14.12396
## EEF1A1               13.96471               14.15797               14.10708
## ND4                  14.06721               14.08082               13.88717
## RPL13A               14.04631               13.99562               13.94295
##        GSM1559308_T398.CEL.gz
## RPL41                14.25283
## RPL37A               14.25449
## RPS18                14.15875
## EEF1A1               14.11421
## ND4                  14.09875
## RPL13A               14.07334

d<- duplicated(rownames(GSE_63885))
table(d)

## d
## FALSE 
## 20864

GSE_63885<- GSE_63885[!d,]

####################################
#Common genes in all datasets#
####################################
idfound<- rownames(TCGA.OV)%in% rownames(GSE_18520)
table(idfound)

## idfound
## FALSE  TRUE 
##   593 10285

TCGA.OV<- TCGA.OV[idfound,]

idfound<- rownames(TCGA.OV)%in% rownames(GSE_26712)
table(idfound)

## idfound
## FALSE  TRUE 
##    60 10225

TCGA.OV<- TCGA.OV[idfound,]

idfound<- rownames(TCGA.OV)%in% rownames(GSE_26193)
table(idfound)

## idfound
##  TRUE 
## 10225

TCGA.OV<- TCGA.OV[idfound,]

idfound<- rownames(TCGA.OV)%in% rownames(GSE_63885)
table(idfound)

## idfound
##  TRUE 
## 10225

TCGA.OV<- TCGA.OV[idfound,]

idfound<- rownames(TCGA.OV)%in% rownames(GSE_14764)
table(idfound)

## idfound
##  TRUE 
## 10225

TCGA.OV<- TCGA.OV[idfound,]
###########################################
#Arrange genes based on order of TCGA.OV#
###########################################
GSE_18520<- GSE_18520[rownames(TCGA.OV),]
GSE_26712<- GSE_26712[rownames(TCGA.OV),]
GSE_26193<- GSE_26193[rownames(TCGA.OV),]
GSE_63885<- GSE_63885[rownames(TCGA.OV),]
GSE_14764<- GSE_14764[rownames(TCGA.OV),]  

##########################################
# Pool the datas#
##########################################

Samples<- cbind.data.frame(TCGA= ncol(TCGA.OV),GSE_18520=ncol(GSE_18520),
                            GSE_26712=ncol(GSE_26712),GSE_26193=ncol(GSE_26193), 
                           GSE_63885=ncol(GSE_63885), GSE_14764= ncol(GSE_14764))
Samples$Total<- rowSums(Samples)
head(Samples)# 1022 Samples

##         TCGA GSE_18520 GSE_26712 GSE_26193 GSE_63885 GSE_14764 Total
## Samples  544        63       195        79        73        68  1022

y.common<- SummarizedExperiment(assays=list(
  counts= cbind(assay(TCGA.OV, "Signal"), assay(GSE_18520, "log2_RMA"),
                assay(GSE_26712, "log2_RMA"), exprs(GSE_26193), exprs(GSE_63885), exprs(GSE_14764)
  )),
  rowData = rowData(TCGA.OV)
  
)
dim(y.common)

## [1] 10225  1022

TCGA.OV$description<-  factor(TCGA.OV$primary_diagnosis)
TCGA.OV$shortLetterCode<- factor(TCGA.OV$shortLetterCode) # NT;NormalTissue, TP; TumorPrimary, TR; TumorRecurrent

GSE_18520$shortLetterCode <- GSE_18520$description
levels(GSE_18520$shortLetterCode)[2]<- "TP"  # PT changed to TP for consistency with TCGA abbreviations
GSE_18520$description<- GSE_18520$characteristics_ch1

GSE_26712$shortLetterCode<- factor(GSE_26712$source_name_ch1)
levels(GSE_26712$shortLetterCode)<- c("NT", "TP")

GSE_26193$shortLetterCode<- factor(GSE_26193$type)
levels(GSE_26193$shortLetterCode)<- "TP"
GSE_26193$description<- factor(GSE_26193$type)
levels(GSE_26193$description)<- "Serous"
levels(GSE_26193$vital_status)<- c("Alive", "Dead")
GSE_26193$time <- GSE_26193$time.in.days

colData(y.common)<-  rbind(colData(TCGA.OV)  [,c("shortLetterCode", "description", "time", "vital_status")],
                           colData(GSE_18520)[, c("shortLetterCode","description", "time", "vital_status")],
                           colData(GSE_26712)[, c("shortLetterCode","description", "time", "vital_status")],
                           pData(GSE_26193)  [, c("shortLetterCode","description", "time", "vital_status")],
                           pData(GSE_63885)  [, c("shortLetterCode","description", "time", "vital_status")],
                           pData(GSE_14764) [, c("shortLetterCode","description", "time", "vital_status")]
                           )

table(y.common$shortLetterCode)

## 
##  NT  TP  TR 
##  28 979  15

y.common$batch<- factor(c(rep("TCGA",times=ncol(TCGA.OV)),
                          rep("GSE18520",times=ncol(GSE_18520)), 
                          rep("GSE26712", times=ncol(GSE_26712)),
                          rep("GSE26193", times=ncol(GSE_26193)),
                          rep("GSE63855", times=ncol(GSE_63885)),
                          rep("GSE14764", times=ncol(GSE_14764)))
                        )
y.common$batch<- relevel(y.common$batch, ref= "TCGA")
rowData(y.common)$Symbol<-rowData(y.common)$gene_id 

dim(y.common) #10255 genes and 1022 samples

## [1] 10225  1022

assay(y.common, "normalized")<- normalizeBetweenArrays(assay(y.common, "counts")) # normalize array
summary(colSums(assay(y.common, "counts")))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   51062   54495   56356   61324   65567   89481

summary(colSums(assay(y.common, "normalized")))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   61324   61324   61324   61324   61324   61324

##################################################
###Remove batch effect#
##################################################
assay(y.common, "normalized_batch")<- removeBatchEffect(assay(y.common, "normalized"), batch= y.common$batch)

my.colors= c("black", "blue","green" ,"yellow", "orange", "red")
points<- c(1:3)
corr_tcga_gse<- cor(assay(y.common, "normalized"))
y.common$correlation<- corr_tcga_gse[,1]

corr_tcga_gse_batch<- cor(assay(y.common, "normalized_batch"))
y.common$correlation_batch<- corr_tcga_gse_batch[,1]

summary(y.common$correlation)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1309  0.7591  0.8727  0.8302  0.9126  1.0000

summary(y.common$correlation_batch)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.4003  0.8830  0.8996  0.8939  0.9130  1.0000

par(mfrow=c(1,2))
my.colors<- c("black", "blue", "green", "yellow", "orange", "red")
points<- c(1:3)

set.seed(123)
plotMDS(assay(y.common, "normalized"), col= my.colors[y.common$batch], pch= points[y.common$shortLetterCode],
        main= "MDSplot_TCGA_GSE_normalized")
legend("topright", legend= levels(y.common$batch), col=my.colors, pch=1, cex=0.75)
legend("topleft", legend= levels(y.common$shortLetterCode), col= "black", pch= points, cex=0.75)
table(y.common$shortLetterCode)

## 
##  NT  TP  TR 
##  28 979  15

set.seed(123)
plotMDS(assay(y.common, "normalized_batch"), col= my.colors[y.common$batch], pch= points[y.common$shortLetterCode],
        main= "MDSplot_TCGA_GSE_normalized_batch")

###############################
# corelation cut-off 0.7 #
##############################


y.common_cor<- y.common[,(y.common$correlation>=0.7)] # keep samples with min 70% similarity 
table(y.common_cor$shortLetterCode) #24 PT, 901 TP, 15 TR

## 
##  NT  TP  TR 
##  24 901  15

y.common_cor_batch<- y.common[,(y.common$correlation_batch>=0.7)]
table(y.common_cor_batch$shortLetterCode) #28 PT, 973 TP, 15 TR

## 
##  NT  TP  TR 
##  28 973  15

y.tcga.gse_cor<- assay(y.common_cor, "normalized")
dim(y.tcga.gse_cor) #10225 genes and 940 samples

## [1] 10225   940

y.tcga.gse_cor_batch<- assay(y.common_cor_batch, "normalized_batch")
dim(y.tcga.gse_cor_batch)#10255 genes and 1016 samples

## [1] 10225  1016

#####################
#design-matrix#
#####################

design<- model.matrix(~y.common_cor$batch+y.common_cor$shortLetterCode,  data= colData(y.common_cor))
colnames(design)

## [1] "(Intercept)"                    "y.common_cor$batchGSE14764"    
## [3] "y.common_cor$batchGSE18520"     "y.common_cor$batchGSE26193"    
## [5] "y.common_cor$batchGSE26712"     "y.common_cor$batchGSE63855"    
## [7] "y.common_cor$shortLetterCodeTP" "y.common_cor$shortLetterCodeTR"

#design_batch
design_batch<- model.matrix(~y.common_cor_batch$batch+y.common_cor_batch$shortLetterCode,  data= colData(y.common_cor_batch))
colnames(design_batch)

## [1] "(Intercept)"                         
## [2] "y.common_cor_batch$batchGSE14764"    
## [3] "y.common_cor_batch$batchGSE18520"    
## [4] "y.common_cor_batch$batchGSE26193"    
## [5] "y.common_cor_batch$batchGSE26712"    
## [6] "y.common_cor_batch$batchGSE63855"    
## [7] "y.common_cor_batch$shortLetterCodeTP"
## [8] "y.common_cor_batch$shortLetterCodeTR"

fit<- lmFit(y.tcga.gse_cor, design)
fit.treat<- treat(fit,fc=1.5, trend=T, robust = T)
summary(decideTests(fit.treat))

##        (Intercept) y.common_cor$batchGSE14764 y.common_cor$batchGSE18520
## Down             0                       2157                       2050
## NotSig           0                       5825                       6248
## Up           10225                       2243                       1927
##        y.common_cor$batchGSE26193 y.common_cor$batchGSE26712
## Down                         1500                       2361
## NotSig                       7265                       5488
## Up                           1460                       2376
##        y.common_cor$batchGSE63855 y.common_cor$shortLetterCodeTP
## Down                         1577                            604
## NotSig                       7079                           9214
## Up                           1569                            407
##        y.common_cor$shortLetterCodeTR
## Down                              387
## NotSig                           9583
## Up                                255

topTreat(fit.treat, coef=7)

##             logFC  AveExpr         t       P.Value     adj.P.Val
## PDE8B   -2.958343 4.657358 -24.83294 2.713855e-105 2.774917e-101
## ABCA8   -4.959095 4.353356 -20.10216  2.728834e-75  1.395116e-71
## FRY     -2.716927 4.718841 -19.67601  1.045609e-72  3.374259e-69
## BCHE    -4.546883 3.850600 -19.66099  1.320004e-72  3.374259e-69
## KDR     -2.746455 4.885296 -19.27283  2.836310e-70  5.800253e-67
## ALDH1A2 -4.634721 5.037018 -19.16503  1.285731e-69  2.191100e-66
## NELL2   -4.269591 5.491429 -18.21096  5.793290e-64  8.462341e-61
## PRG4    -2.322605 3.690822 -18.10170  2.514308e-63  3.213600e-60
## PROCR   -2.725052 5.083648 -16.61484  7.952715e-55  9.035168e-52
## REEP1   -3.216113 5.202065 -16.37232  1.796412e-53  1.836831e-50

#fit_batch
fit_batch<- lmFit(y.tcga.gse_cor_batch, design_batch)
fit.treat_batch<- treat(fit_batch,fc=1.5, trend=T, robust = T)
summary(decideTests(fit.treat_batch))

##        (Intercept) y.common_cor_batch$batchGSE14764
## Down             0                                0
## NotSig           0                            10225
## Up           10225                                0
##        y.common_cor_batch$batchGSE18520 y.common_cor_batch$batchGSE26193
## Down                                  0                                0
## NotSig                            10225                            10225
## Up                                    0                                0
##        y.common_cor_batch$batchGSE26712 y.common_cor_batch$batchGSE63855
## Down                                  0                                0
## NotSig                            10225                            10225
## Up                                    0                                0
##        y.common_cor_batch$shortLetterCodeTP
## Down                                    649
## NotSig                                 9163
## Up                                      413
##        y.common_cor_batch$shortLetterCodeTR
## Down                                    399
## NotSig                                 9592
## Up                                      234

topTreat(fit.treat_batch, coef=7)

##             logFC  AveExpr         t       P.Value     adj.P.Val
## PDE8B   -2.995180 4.539606 -26.02577 4.726398e-115 4.832742e-111
## KDR     -2.928108 4.789632 -22.48891  1.812459e-91  9.266199e-88
## PRG4    -2.588567 3.523736 -22.43230  4.377221e-91  1.491903e-87
## ABCA8   -5.050961 4.312954 -21.06813  2.773090e-82  7.088711e-79
## BCHE    -4.509686 3.757108 -20.97090  1.153730e-81  2.359377e-78
## FRY     -2.727236 4.659661 -20.58400  3.165120e-79  5.393891e-76
## ALDH1A2 -4.665667 5.283042 -20.45095  2.258316e-78  3.298755e-75
## NELL2   -4.198817 5.507578 -19.64696  2.357152e-73  3.012735e-70
## PROCR   -2.820136 4.489355 -18.95678  4.060233e-69  4.612876e-66
## CLEC4M  -2.276993 4.306947 -18.40005  9.381371e-66  9.592451e-63

VO<- cbind.data.frame(decideTests(fit.treat)[,7],decideTests(fit.treat_batch)[,7] )
colnames(VO)<- c("Norm", "batch_Norm")
vennDiagram(VO, include= c("up", "down"), circle.col = c("blue", "red"), counts.col = c("red", "blue"))

par(mfrow=c(1,2))

plotMD(fit.treat_batch, coef=7, status=decideTests(fit.treat_batch[,7]), main= "MA plot", legend= "bottomright")
abline(h=c(-log2(1.5),log2(1.5)), lty=2)

#Adding no. genes up and down inside the plot
text(4,2.75, summary(decideTests(fit.treat_batch[,7]))[1], col="red")
text(12,-2.75, summary(decideTests(fit.treat_batch[,7]))[3], col="blue")
#Adding gene details
egSymbol<- toTable(org.Hs.egSYMBOL)
m<- match(rownames(fit.treat_batch), egSymbol$symbol)
genedetails<- data.frame( genesymbol=rownames(fit.treat_batch), gene_id= egSymbol$gene_id[m],
                          status_PTvsNT=factor(decideTests(fit.treat_batch[,7])) )
fit.treat_batch[["genedetails"]]<-genedetails
head(fit.treat_batch$genedetails)

##   genesymbol gene_id status_PTvsNT
## 1     EEF1A1    1915             0
## 2      RPS3A    6189             0
## 3      RPS4X    6191             0
## 4      RPS27    6232             0
## 5      RPL39    6170             0
## 6      RPS23    6228             0

my.colors_vol<- c("blue", "black", "red")
my.points_vol<- c(1,0.25,1)
volcanoplot(fit.treat_batch, coef=7, names= rownames(fit.treat_batch), highlight = 10,
            col=my.colors_vol[fit.treat_batch$gene$status], hl.col = "black", 
            cex=my.points_vol[fit.treat_batch$gene$status_PTvsNT], main= "Volcanoplot" )
legend=legend("topright",legend= c("Notsig", "Up", "Down"), col= c("black", "red", "blue"), pch=20, pt.cex= c(0.25,1,1) )

abline(v=c(-log2(1.5), log2(1.5)),lty=2)

##########################################
#MDSplot with SDE genes of TP
#########################################
set.seed(123)
plotMDS(assay(y.common_cor_batch, "normalized_batch")[rownames(topTreat(fit.treat_batch, coef=7, number=30)), ], col= my.colors[y.common_cor_batch$batch], pch= points[y.common_cor_batch$shortLetterCode],
        main= "MDSplot_TCGA_GSE_normalized_batch_30")
legend("topright", legend= levels(y.common$batch), col=my.colors, pch=1, cex=0.75)
legend("topleft", legend= levels(y.common_cor_batch$shortLetterCode), col= "black", pch= points, cex=0.75)


par(mfrow=c(2,5))

SDE_PT<-y.tcga.gse_cor_batch[,y.common_cor_batch$shortLetterCode!="TR"]
dim(SDE_PT)

## [1] 10225  1001

Type<- factor(y.common_cor_batch$shortLetterCode[y.common_cor_batch$shortLetterCode!="TR"])
for (i in 1:10) {
  plot(SDE_PT[rownames(topTreat(fit.treat_batch,coef=7, n="inf"))[i],]~Type,col= c("black", "blue"),
       xlab= "", ylab= "log2.expression", main= rownames(topTreat(fit.treat_batch,coef=7, n="inf"))[i])
  }

##################################
#pathway analysis (TUMOR VS CTRL)#
##################################
go<- goana(fit.treat_batch, coef=7,geneid = fit.treat_batch$genedetails$gene_id,species= "Hs" )
topGO(go, ontology = "BP") # MITOTIC CELL,G1/S DNArepair  pathways UPREGULATED IN CANCER VS NORMAL

##                                                                 Term Ont    N
## GO:1903047                                mitotic cell cycle process  BP  611
## GO:0000278                                        mitotic cell cycle  BP  716
## GO:0051301                                             cell division  BP  399
## GO:0022402                                        cell cycle process  BP  913
## GO:0007049                                                cell cycle  BP 1233
## GO:0000280                                          nuclear division  BP  268
## GO:0000070                      mitotic sister chromatid segregation  BP  113
## GO:0000819                              sister chromatid segregation  BP  130
## GO:0007059                                    chromosome segregation  BP  201
## GO:0140014                                  mitotic nuclear division  BP  202
## GO:0048285                                         organelle fission  BP  298
## GO:0098813                            nuclear chromosome segregation  BP  159
## GO:0044772                       mitotic cell cycle phase transition  BP  417
## GO:0044770                               cell cycle phase transition  BP  444
## GO:0051276                                   chromosome organization  BP  777
## GO:0051726                                  regulation of cell cycle  BP  822
## GO:0010564                          regulation of cell cycle process  BP  535
## GO:1902850 microtubule cytoskeleton organization involved in mitosis  BP  104
## GO:0007051                                      spindle organization  BP  124
## GO:0071103                                   DNA conformation change  BP  195
##             Up Down         P.Up    P.Down
## GO:1903047  91   34 4.209569e-29 0.8159693
## GO:0000278  99   40 4.306685e-29 0.8272566
## GO:0051301  69   29 7.893316e-26 0.2480253
## GO:0022402 105   47 3.371654e-24 0.9515656
## GO:0007049 124   68 1.546724e-23 0.9118797
## GO:0000280  52   13 6.723441e-22 0.8769150
## GO:0000070  34    2 4.922454e-21 0.9949247
## GO:0000819  36    3 7.465585e-21 0.9907226
## GO:0007059  44    4 8.245322e-21 0.9990917
## GO:0140014  44    9 1.016346e-20 0.9016661
## GO:0048285  53   16 1.742421e-20 0.7919150
## GO:0098813  38    3 1.663878e-19 0.9980519
## GO:0044772  58   19 4.664767e-17 0.9543943
## GO:0044770  59   20 2.137449e-16 0.9633394
## GO:0051276  80   29 2.076509e-15 0.9996679
## GO:0051726  82   41 5.334810e-15 0.9627592
## GO:0010564  62   23 2.420275e-14 0.9854882
## GO:1902850  24    7 2.161967e-12 0.4928457
## GO:0007051  26    6 2.747498e-12 0.8072682
## GO:0071103  32    9 8.970279e-12 0.8786757

#write.csv(topGO(go, ontology = "BP"), file= "topGO.csv")
topGO.PT<- topGO(go, ontology = "BP", truncate.term = 30)

kegg<- kegga(fit.treat_batch, coef=7,geneid = fit.treat_batch$genedetails$gene_id,species= "Hs")
topKEGG(kegg) # cellcycle, dna replication, Diabetic cardiomyopathy, NAFLD upregulated

##                                             Pathway   N Up Down         P.Up
## path:hsa04110                            Cell cycle 104 23    2 1.611804e-11
## path:hsa04610   Complement and coagulation cascades  77  1   16 9.586955e-01
## path:hsa03030                       DNA replication  32  8    0 2.962288e-05
## path:hsa05012                     Parkinson disease 198 20   10 1.453050e-04
## path:hsa05205               Proteoglycans in cancer 181  4   25 9.384746e-01
## path:hsa00190             Oxidative phosphorylation  88 12    1 2.041266e-04
## path:hsa05143               African trypanosomiasis  31  1    8 7.219724e-01
## path:hsa04512              ECM-receptor interaction  76 10    7 9.099854e-04
## path:hsa04923 Regulation of lipolysis in adipocytes  50  0   10 1.000000e+00
## path:hsa05165        Human papillomavirus infection 285 23   21 1.226189e-03
## path:hsa04510                        Focal adhesion 176 10   22 1.748276e-01
## path:hsa01230           Biosynthesis of amino acids  58  8    1 2.155896e-03
## path:hsa05020                         Prion disease 211 18   10 2.221340e-03
## path:hsa00830                    Retinol metabolism  38  0    8 1.000000e+00
## path:hsa00340                  Histidine metabolism  16  0    5 1.000000e+00
## path:hsa04115                 p53 signaling pathway  61  8    5 2.981696e-03
## path:hsa00982     Drug metabolism - cytochrome P450  40  1    8 8.084109e-01
## path:hsa04114                        Oocyte meiosis  93 10    8 4.197265e-03
## path:hsa05130 Pathogenic Escherichia coli infection 156 14   10 4.230349e-03
## path:hsa00330       Arginine and proline metabolism  34  1    7 7.544169e-01
##                     P.Down
## path:hsa04110 9.914401e-01
## path:hsa04610 2.069782e-05
## path:hsa03030 1.000000e+00
## path:hsa05012 8.150140e-01
## path:hsa05205 1.872063e-04
## path:hsa00190 9.969609e-01
## path:hsa05143 5.387913e-04
## path:hsa04512 2.063235e-01
## path:hsa04923 1.003968e-03
## path:hsa05165 2.689900e-01
## path:hsa04510 1.720395e-03
## path:hsa01230 9.779489e-01
## path:hsa05020 8.694962e-01
## path:hsa00830 2.253225e-03
## path:hsa00340 2.461951e-03
## path:hsa04115 3.451099e-01
## path:hsa00982 3.167943e-03
## path:hsa04114 2.370325e-01
## path:hsa05130 5.342913e-01
## path:hsa00330 4.794405e-03

#write.csv(topKEGG(kegg), file= "topKEGG.csv")
topkegg.PT<- topKEGG(kegg, truncate.path = 30)

################################################
#Significant differential genes; Tumor vs Normal
################################################

differentailgenes<- rownames(decideTests(fit.treat_batch[,7]))[decideTests(fit.treat_batch[,7])!=0] 
length(differentailgenes)

## [1] 1062

y.common_surv<- y.common_cor_batch[c(differentailgenes),  ] #  1062 de genes and 1016 samples and 
y.common_surv<-y.common_surv[,!is.na(y.common_surv$time)] # 982 samples
y.common_surv<-y.common_surv[,y.common_surv$shortLetterCode!="NT"] # excluding normal samples
y.common_surv$shortLetterCode<- factor(y.common_surv$shortLetterCode)
dim(y.common_surv) #1062 genes and 982 samples

## [1] 1062  982

y.common_surv$vital_status <- factor(y.common_surv$vital_status)

table(y.common_surv$shortLetterCode) #967 primary and 15 recurrent tumor with survival time

## 
##  TP  TR 
## 967  15

######################################
#Splitting and merging Pheno and counts data#
#####################################
OV.PhenoData<- data.frame(colData(y.common_surv))
OV.Counts<- assay(y.common_surv, "normalized_batch")
OV.Counts<- OV.Counts[order(rowSums(OV.Counts), decreasing= T),]
head(OV.Counts)[, 1:5]

##        TCGA-29-1692-01B-01R-0808-01 TCGA-13-1488-01A-01R-0477-01
## GAPDH                      12.58758                     12.68406
## TUBA1B                     11.80367                     11.78490
## TUBA1C                     11.75346                     11.91617
## RPL18A                     11.47641                     10.88576
## COX6C                      11.08334                     11.63995
## WFDC2                      10.26358                     10.48439
##        TCGA-13-1483-01A-01R-0477-01 TCGA-24-2024-01A-02R-0668-01
## GAPDH                      12.58758                     12.49987
## TUBA1B                     11.16848                     11.62928
## TUBA1C                     11.15961                     11.55965
## RPL18A                     11.18119                     11.55521
## COX6C                      11.21686                     10.57146
## WFDC2                      10.72970                     10.58379
##        TCGA-13-0908-01B-01R-0453-01
## GAPDH                      12.46622
## TUBA1B                     11.47308
## TUBA1C                     11.57144
## RPL18A                     10.83822
## COX6C                      12.06827
## WFDC2                      10.22433

d<- duplicated(rownames(OV.Counts))
table(d)

## d
## FALSE 
##  1062

OV.Counts<- OV.Counts[!d,]

covariates<- rownames(OV.Counts)
OV_TCGA_GSE<-cbind.data.frame(OV.PhenoData,t(OV.Counts))
colnames(OV_TCGA_GSE) # c1 to 7 is PhenoData

##    [1] "shortLetterCode"   "description"       "time"             
##    [4] "vital_status"      "batch"             "correlation"      
##    [7] "correlation_batch" "GAPDH"             "TUBA1B"           
##   [10] "TUBA1C"            "RPL18A"            "COX6C"            
##   [13] "WFDC2"             "TUBB"              "COX6A1"           
##   [16] "VIM"               "ASS1"              "SERF2"            
##   [19] "COL1A1"            "C3"                "COX5B"            
##   [22] "TPI1"              "LGALS3"            "TIMP1"            
##   [25] "COX7B"             "SPINT2"            "S100A10"          
##   [28] "CAPN2"             "HDGF"              "SPP1"             
##   [31] "MARCKSL1"          "MUC1"              "SPON1"            
##   [34] "TXNIP"             "UCP2"              "IFI27"            
##   [37] "PNN"               "UQCRQ"             "PFN1"             
##   [40] "SNRPB"             "MT1X"              "PEA15"            
##   [43] "SDCBP"             "NPTN"              "NCOA4"            
##   [46] "TMED10"            "TAGLN2"            "BANF1"            
##   [49] "TACSTD2"           "GPI"               "PRKCI"            
##   [52] "PSMB2"             "GSTP1"             "S100A13"          
##   [55] "C19orf53"          "FOLR1"             "WT1"              
##   [58] "KRT8"              "CCT5"              "S100A11"          
##   [61] "DCN"               "PUF60"             "C1S"              
##   [64] "NDUFB11"           "KPNA2"             "IGFBP2"           
##   [67] "ASAH1"             "CYC1"              "ST13"             
##   [70] "CRABP2"            "CALD1"             "BZW2"             
##   [73] "ISG15"             "PAX8"              "JUN"              
##   [76] "FAM120A"           "MT1G"              "BTG3"             
##   [79] "LTA4H"             "ANXA5"             "SFN"              
##   [82] "PNRC2"             "S100A4"            "PRKAR1A"          
##   [85] "COL4A2"            "PRKCSH"            "CYFIP1"           
##   [88] "PTTG1"             "FKBP1A"            "YWHAE"            
##   [91] "TPM1"              "MTHFD2"            "COL4A1"           
##   [94] "TPD52L1"           "RPN1"              "SERPING1"         
##   [97] "ATP6V0B"           "MRPL15"            "AURKAIP1"         
##  [100] "HTRA1"             "CKS1B"             "AHCY"             
##  [103] "SH3BGRL"           "UGP2"              "PDGFC"            
##  [106] "GANAB"             "NFE2L2"            "CXCR4"            
##  [109] "SHMT2"             "HYOU1"             "CIRBP"            
##  [112] "OAT"               "RCN2"              "POLR2H"           
##  [115] "IFI16"             "CKS2"              "PRMT1"            
##  [118] "SERPINH1"          "VEGFA"             "CLDN3"            
##  [121] "FKBP4"             "LSM4"              "TCEAL4"           
##  [124] "TRIM28"            "SPARCL1"           "CP"               
##  [127] "SYNGR2"            "LCN2"              "OGT"              
##  [130] "CALR"              "GJA1"              "B4GALT5"          
##  [133] "MDK"               "YPEL5"             "SOX17"            
##  [136] "SLC34A2"           "SLC39A6"           "JUP"              
##  [139] "ALDH2"             "UBE2C"             "CTSA"             
##  [142] "PAIP1"             "ALDH9A1"           "RBM5"             
##  [145] "BCAM"              "HNRNPA0"           "EIF6"             
##  [148] "MLF2"              "ANXA4"             "CFI"              
##  [151] "TSPAN13"           "SCGB2A1"           "CALU"             
##  [154] "JAK1"              "MCM6"              "LMBRD1"           
##  [157] "ELF3"              "TOB1"              "SMC4"             
##  [160] "HMGA1"             "IDH2"              "LRRC8D"           
##  [163] "USO1"              "PFKP"              "TYMS"             
##  [166] "KLK7"              "ELOVL5"            "ARPC5L"           
##  [169] "PODXL"             "PDCD4"             "TUBB3"            
##  [172] "FIBP"              "SNRPC"             "NR2F6"            
##  [175] "PPP4C"             "ABHD11"            "NDUFS8"           
##  [178] "PDIA4"             "LRIG1"             "SPTBN1"           
##  [181] "MRPS15"            "GMPS"              "HEXB"             
##  [184] "TPD52L2"           "TSPYL1"            "TES"              
##  [187] "SRRM1"             "MMP7"              "PPIF"             
##  [190] "COPE"              "GAS1"              "KRT7"             
##  [193] "MCM2"              "TMEM9B"            "ROCK1"            
##  [196] "TNFAIP2"           "MYLK"              "HEBP1"            
##  [199] "KLK6"              "TPP1"              "WSB1"             
##  [202] "KLK8"              "PLS3"              "FBXO9"            
##  [205] "PRNP"              "RFC4"              "SMARCC1"          
##  [208] "METTL7A"           "NUSAP1"            "IFI6"             
##  [211] "GNG12"             "ARF5"              "SLC2A1"           
##  [214] "CAST"              "ZWINT"             "DPYSL2"           
##  [217] "IL1R1"             "SERINC1"           "LIPA"             
##  [220] "CRIP2"             "EIF4G1"            "IK"               
##  [223] "RAB31"             "PDLIM5"            "HBB"              
##  [226] "IGBP1"             "IL6ST"             "ATP6V1D"          
##  [229] "HSD17B4"           "GPRC5B"            "FEZ2"             
##  [232] "RNF103"            "SELENBP1"          "CORO1B"           
##  [235] "LSR"               "CLDN7"             "RUSC1"            
##  [238] "CAB39"             "FAM8A1"            "BACE2"            
##  [241] "RALA"              "TOR1AIP1"          "RRBP1"            
##  [244] "SLC39A4"           "ZBED5"             "DCTD"             
##  [247] "RRM2"              "EXOSC4"            "SH3BP5"           
##  [250] "PSAT1"             "RFXANK"            "NUCB1"            
##  [253] "FBXL5"             "SF3B4"             "MCM3"             
##  [256] "TPBG"              "ECHDC2"            "PPP1R14B"         
##  [259] "MYO10"             "GIPC1"             "TJP2"             
##  [262] "PFDN4"             "RASA1"             "ST14"             
##  [265] "RDX"               "SORT1"             "FXYD3"            
##  [268] "ADRM1"             "THY1"              "MFAP2"            
##  [271] "HGS"               "CCNB2"             "SEC24C"           
##  [274] "KLHDC3"            "MRPL12"            "KLF10"            
##  [277] "HMGB3"             "NR3C1"             "TPX2"             
##  [280] "NUTF2"             "CREBL2"            "TACC1"            
##  [283] "DLG5"              "MGP"               "NBR1"             
##  [286] "SERPINB6"          "UBAP2L"            "RGS1"             
##  [289] "DBN1"              "MCM7"              "RGL1"             
##  [292] "RBL2"              "GTF2IRD1"          "TST"              
##  [295] "PREPL"             "SCRIB"             "BCL2L1"           
##  [298] "HBA2"              "PCCB"              "CD14"             
##  [301] "LIMA1"             "CREB3L2"           "ILVBL"            
##  [304] "ATG4B"             "TERF2IP"           "MAL"              
##  [307] "TRIM22"            "MAP1LC3B"          "PPP3CB"           
##  [310] "CDKN2A"            "CENPF"             "CALCOCO2"         
##  [313] "IMPA1"             "POP7"              "SPG11"            
##  [316] "CAPG"              "ACAA2"             "CCNE1"            
##  [319] "CRYZ"              "MX2"               "CLIC5"            
##  [322] "MRPL13"            "PLD3"              "ALG3"             
##  [325] "AGPAT2"            "TUBA4A"            "BCL7C"            
##  [328] "MRPL4"             "FKBP11"            "DDX49"            
##  [331] "FHL1"              "CAV1"              "PMP22"            
##  [334] "LANCL1"            "AFF1"              "STIP1"            
##  [337] "RALY"              "ADAM9"             "CDC20"            
##  [340] "MELK"              "ATP6V0E2"          "TPM2"             
##  [343] "DNAJB9"            "EHF"               "TOP2A"            
##  [346] "MICAL2"            "UNC119B"           "CCNB1"            
##  [349] "FUT8"              "HEG1"              "SLTM"             
##  [352] "UBL3"              "DDX11"             "PURA"             
##  [355] "LAGE3"             "S100A2"            "RACGAP1"          
##  [358] "SORD"              "HSD17B11"          "ISG20L2"          
##  [361] "MPHOSPH6"          "EBP"               "VWF"              
##  [364] "ELOVL1"            "P4HA2"             "PKIG"             
##  [367] "PNO1"              "NME7"              "PRSS8"            
##  [370] "TSPYL4"            "STX12"             "EHBP1"            
##  [373] "CTNNAL1"           "S100A1"            "FTO"              
##  [376] "CDH6"              "INPPL1"            "PSME4"            
##  [379] "S100A14"           "TGFBR2"            "UBE2S"            
##  [382] "MMP11"             "KRT17"             "BDH2"             
##  [385] "ACTL6A"            "TXNDC15"           "PRKDC"            
##  [388] "TMOD1"             "NDUFB7"            "ISOC1"            
##  [391] "SEC24B"            "KIF2C"             "PARVA"            
##  [394] "LXN"               "CHD9"              "LSM2"             
##  [397] "SKIL"              "VPS13C"            "RNF38"            
##  [400] "MAZ"               "NEU1"              "BGN"              
##  [403] "RAP1GAP"           "TRIP13"            "INHBB"            
##  [406] "RGS5"              "CDCA4"             "GOLM1"            
##  [409] "ZBTB20"            "CD44"              "RNASEH2A"         
##  [412] "EIF4EBP1"          "MST1"              "TNFRSF12A"        
##  [415] "DEFB1"             "SLC43A3"           "SOX9"             
##  [418] "ITGB8"             "PRKACB"            "CFDP1"            
##  [421] "SNX7"              "BBS1"              "SLC25A11"         
##  [424] "RHOT1"             "NEK9"              "BUB1B"            
##  [427] "EPB41L4B"          "CERK"              "DAB2"             
##  [430] "GRINA"             "SNRK"              "RRAS2"            
##  [433] "PAFAH1B3"          "MTSS1"             "PRPS1"            
##  [436] "FNDC3A"            "ZNF395"            "ECT2"             
##  [439] "CPE"               "PRC1"              "FARSA"            
##  [442] "PUS7"              "SGCE"              "DHRS7"            
##  [445] "ANXA3"             "GINS1"             "CLDN4"            
##  [448] "MRPS12"            "MAD2L1"            "ZNF93"            
##  [451] "SLC2A4RG"          "SLC5A6"            "SMG7"             
##  [454] "PRAME"             "RANGAP1"           "GCC2"             
##  [457] "MFN1"              "RBM38"             "APOA1"            
##  [460] "DAPK1"             "RGS2"              "NMU"              
##  [463] "EPB41L2"           "TIAM1"             "PDGFRA"           
##  [466] "ZNF593"            "SEMA3C"            "TMPRSS4"          
##  [469] "CDCA8"             "KLK5"              "LEPROTL1"         
##  [472] "MAP7D1"            "AURKA"             "TCEAL1"           
##  [475] "S100A8"            "PKP3"              "SYNE2"            
##  [478] "ACOT9"             "KIFC1"             "CRIM1"            
##  [481] "GTF2H1"            "HSDL2"             "HNMT"             
##  [484] "MAOB"              "ESPL1"             "TCTN1"            
##  [487] "JAG2"              "POR"               "RBM6"             
##  [490] "TBC1D9"            "GALNT6"            "EFEMP1"           
##  [493] "FBXW11"            "OSBPL1A"           "PTP4A3"           
##  [496] "MCM4"              "LGALS8"            "TARBP2"           
##  [499] "VCAM1"             "MTUS1"             "YKT6"             
##  [502] "FGF18"             "AKAP11"            "NOTCH3"           
##  [505] "AUH"               "ZNF330"            "TMEM47"           
##  [508] "GGH"               "TGFBR3"            "RARRES1"          
##  [511] "TNNT1"             "RAPGEF2"           "PER2"             
##  [514] "CLDN15"            "GLDC"              "PJA2"             
##  [517] "ATF1"              "ANKMY2"            "ASF1B"            
##  [520] "DTYMK"             "GLS"               "HSF1"             
##  [523] "SST"               "NUP210"            "CETN3"            
##  [526] "TK1"               "FJX1"              "SRD5A1"           
##  [529] "VSIG4"             "OVOL2"             "KIF20A"           
##  [532] "SMARCA2"           "RCHY1"             "FASN"             
##  [535] "RBPMS"             "SAC3D1"            "CTSO"             
##  [538] "NCAPD3"            "TM7SF2"            "CPVL"             
##  [541] "BIRC5"             "NT5DC2"            "TTC28"            
##  [544] "GSTM3"             "RAB11FIP2"         "HMMR"             
##  [547] "FKBP8"             "CCDC85B"           "TRAK2"            
##  [550] "RABGAP1L"          "FBXW2"             "PYCR1"            
##  [553] "NFIC"              "OVGP1"             "CCND2"            
##  [556] "TBC1D2B"           "PLSCR4"            "STXBP1"           
##  [559] "SDC1"              "HP"                "LAD1"             
##  [562] "SDC2"              "GATA6"             "PCDH7"            
##  [565] "FLAD1"             "SLK"               "EPS8"             
##  [568] "CLK1"              "MEIS2"             "GRHL2"            
##  [571] "CCDC28A"           "ASPM"              "DSE"              
##  [574] "GCA"               "EFHC1"             "PIK3R1"           
##  [577] "WDR37"             "BNC2"              "CEP55"            
##  [580] "NCAPD2"            "AMIGO2"            "LAMP3"            
##  [583] "CENPA"             "TRAF5"             "UCK2"             
##  [586] "MRPL2"             "SLIT2"             "VWA1"             
##  [589] "NEK2"              "KLF2"              "LOXL2"            
##  [592] "LMNB2"             "SETX"              "PLCB1"            
##  [595] "G0S2"              "FZD7"              "PTGIS"            
##  [598] "NDN"               "TNFAIP8"           "TROAP"            
##  [601] "TIMELESS"          "SATB1"             "TBRG4"            
##  [604] "EMP3"              "HERC1"             "FOXM1"            
##  [607] "BTAF1"             "DDHD2"             "ZBTB1"            
##  [610] "CLDN1"             "C7"                "ING3"             
##  [613] "RCBTB2"            "RAP2C"             "SCG5"             
##  [616] "TFAP2A"            "COBL"              "CDKN3"            
##  [619] "TSC1"              "HOXC6"             "SLC39A8"          
##  [622] "TRIB3"             "ZDHHC13"           "ST3GAL5"          
##  [625] "ATXN1"             "ATAD2"             "WNT5A"            
##  [628] "ERLIN2"            "INPP1"             "PRSS21"           
##  [631] "KIAA1109"          "TTK"               "ARMCX1"           
##  [634] "WDR19"             "ZDHHC17"           "HMGA2"            
##  [637] "ARHGEF10"          "MPDZ"              "MLXIP"            
##  [640] "PDZD2"             "RPL39L"            "MDFIC"            
##  [643] "LZTFL1"            "GLIPR1"            "LRRN2"            
##  [646] "PKN1"              "TFPI2"             "COL4A5"           
##  [649] "PAPSS2"            "INSR"              "GNG11"            
##  [652] "FRMD4B"            "ZWILCH"            "MPZL2"            
##  [655] "RASAL1"            "MYBL2"             "VPS13B"           
##  [658] "CDCA3"             "FAS"               "EFNB3"            
##  [661] "TACC3"             "CEBPG"             "COL9A2"           
##  [664] "S100PBP"           "B3GAT3"            "MKI67"            
##  [667] "HSPA2"             "RAD51AP1"          "PSRC1"            
##  [670] "GPSM2"             "CHODL"             "HERC2"            
##  [673] "STX2"              "FANCI"             "SECISBP2"         
##  [676] "PITPNC1"           "DDR2"              "BOP1"             
##  [679] "DHX29"             "PYGL"              "GALC"             
##  [682] "PKP2"              "SESN1"             "MBP"              
##  [685] "USP18"             "CFH"               "KIF13B"           
##  [688] "IGFBP6"            "CBX7"              "LAMB1"            
##  [691] "FYCO1"             "SPAG9"             "HTR3A"            
##  [694] "C3AR1"             "BLM"               "MAP3K5"           
##  [697] "STIL"              "BEX1"              "FZD1"             
##  [700] "OASL"              "MAF"               "SFMBT1"           
##  [703] "UPK1B"             "PDLIM7"            "PAK1IP1"          
##  [706] "GINS2"             "HSPA12A"           "ANKRA2"           
##  [709] "PBK"               "PRKCA"             "C6orf120"         
##  [712] "LMNB1"             "C14orf132"         "PDGFD"            
##  [715] "SORBS2"            "CDC42EP3"          "AGA"              
##  [718] "ETS2"              "TBC1D4"            "LIMCH1"           
##  [721] "ARMC8"             "TRIM2"             "ERCC5"            
##  [724] "KLF4"              "SVEP1"             "XPA"              
##  [727] "PRKAR2B"           "PBX3"              "ITCH"             
##  [730] "RECQL4"            "BTN3A3"            "RAB11FIP3"        
##  [733] "LRP8"              "PALMD"             "LRFN4"            
##  [736] "TNKS"              "PROS1"             "FLRT2"            
##  [739] "MAP4K1"            "KIF4A"             "SEMA5A"           
##  [742] "SNAI2"             "BAK1"              "PDLIM3"           
##  [745] "DTL"               "POLQ"              "AQP9"             
##  [748] "CCNA2"             "IFT88"             "GALNT12"          
##  [751] "WASF3"             "NF1"               "FBXL7"            
##  [754] "PDGFRL"            "SFRP1"             "EDNRA"            
##  [757] "SERPINB9"          "CDC25A"            "ID3"              
##  [760] "CBR4"              "GPR137B"           "PEX12"            
##  [763] "DOCK4"             "HSPB8"             "MAOA"             
##  [766] "MAN1A1"            "ELOVL6"            "SIRPA"            
##  [769] "TLE4"              "MBNL3"             "NCAPH"            
##  [772] "SPAG5"             "AMPD3"             "CAV2"             
##  [775] "ATF7IP2"           "AHI1"              "CRYL1"            
##  [778] "UPK3B"             "ATP6V1B1"          "PEG3"             
##  [781] "EZH2"              "KLF11"             "LAMA4"            
##  [784] "GATM"              "CACNA1A"           "RAB8B"            
##  [787] "DIAPH2"            "ZNF211"            "RGS4"             
##  [790] "RPS6KA2"           "KIF11"             "DNMT3B"           
##  [793] "POLE2"             "GSPT2"             "IRS1"             
##  [796] "FZD10"             "CADPS2"            "RAD51"            
##  [799] "SAMD4A"            "MEF2C"             "CENPN"            
##  [802] "COLEC12"           "FGL2"              "MEGF9"            
##  [805] "FBP1"              "WDR7"              "VLDLR"            
##  [808] "KIF22"             "RAB38"             "SLC46A3"          
##  [811] "TBP"               "RAD54L"            "ZFPM2"            
##  [814] "ALDH1A3"           "SLC31A2"           "BMP4"             
##  [817] "PPP1R12B"          "FBXO3"             "FLNC"             
##  [820] "COMP"              "EPHB6"             "PCDH17"           
##  [823] "CX3CR1"            "DTNA"              "CDT1"             
##  [826] "CFD"               "NELL2"             "BACH1"            
##  [829] "NDC80"             "INSIG2"            "NAT1"             
##  [832] "YTHDC2"            "PLA2G2A"           "SNCA"             
##  [835] "SULT1C2"           "AGR2"              "ZNF175"           
##  [838] "BUB1"              "KCNJ2"             "PPWD1"            
##  [841] "ADCY9"             "PKD2"              "BBS4"             
##  [844] "CDC7"              "DPYD"              "FGF9"             
##  [847] "KIF14"             "WDR47"             "NT5E"             
##  [850] "USP12"             "PIGB"              "SOBP"             
##  [853] "GALNT10"           "RHOBTB1"           "AKT3"             
##  [856] "ECM2"              "NR2F1"             "PTPRE"            
##  [859] "L1CAM"             "GNAI1"             "PPP1R3C"          
##  [862] "GRAMD1C"           "EXO1"              "MITF"             
##  [865] "PCCA"              "PPM1D"             "OLFML1"           
##  [868] "PTH2R"             "DYNC2H1"           "TRIM68"           
##  [871] "SNCAIP"            "CDON"              "FRAS1"            
##  [874] "CLIP4"             "CAND2"             "GFPT2"            
##  [877] "WDR44"             "REEP1"             "C4BPA"            
##  [880] "ALDH1A2"           "HDAC4"             "CCDC93"           
##  [883] "BAMBI"             "FLRT3"             "SPOCK1"           
##  [886] "CENPE"             "RBMS3"             "KIF15"            
##  [889] "FCGR3B"            "TRO"               "CASD1"            
##  [892] "SEMA6D"            "ZNF45"             "E2F8"             
##  [895] "DEPDC1"            "AOX1"              "GCNT1"            
##  [898] "PTGER4"            "MCM10"             "OIP5"             
##  [901] "DPY19L1"           "PTCH1"             "PLCL2"            
##  [904] "THRA"              "ME1"               "CHEK1"            
##  [907] "EPHX2"             "ITGAM"             "RNASE4"           
##  [910] "ATP10D"            "ZEB1"              "TSPAN8"           
##  [913] "CALB2"             "PDK4"              "NCAPG"            
##  [916] "EVI5"              "LRRC49"            "ZNF226"           
##  [919] "FBXW7"             "PSD3"              "MCTP2"            
##  [922] "HSD11B1"           "TSPAN5"            "ALDH1A1"          
##  [925] "DIXDC1"            "ULK2"              "TBC1D12"          
##  [928] "NAV3"              "HOXA5"             "PDPN"             
##  [931] "LYVE1"             "TRPC1"             "GNE"              
##  [934] "KCNJ8"             "THBD"              "FGF13"            
##  [937] "MCOLN3"            "RORA"              "MNDA"             
##  [940] "MARCO"             "NRXN3"             "KIF23"            
##  [943] "ZNF671"            "ANG"               "OSTM1"            
##  [946] "CSF2RB"            "TNXB"              "DOCK5"            
##  [949] "FGF1"              "CYP3A5"            "PLAC8"            
##  [952] "GALK2"             "GREB1"             "KDR"              
##  [955] "PRKD1"             "ZNF573"            "CAPN3"            
##  [958] "WNT2B"             "GPRASP1"           "PACRG"            
##  [961] "TCEAL2"            "CPEB1"             "SYNE1"            
##  [964] "ABCA5"             "TCF21"             "FRY"              
##  [967] "LAMA2"             "CORO2B"            "DMD"              
##  [970] "SMPDL3A"           "PTGER3"            "MAP3K8"           
##  [973] "PLA2G4A"           "SLC30A4"           "SOCS2"            
##  [976] "DIRAS3"            "PCSK5"             "PLCE1"            
##  [979] "NLGN4X"            "KLRB1"             "ABCG2"            
##  [982] "PDE8B"             "PGR"               "RAB27B"           
##  [985] "KCNB1"             "EFCAB1"            "IL18"             
##  [988] "HBD"               "ZBTB16"            "PROCR"            
##  [991] "PTPN13"            "PDE7B"             "RTN1"             
##  [994] "SERPINA6"          "P2RX7"             "DNAH9"            
##  [997] "SPC25"             "NR3C2"             "VNN2"             
## [1000] "NAP1L3"            "GSTA3"             "EPB41L3"          
## [1003] "C6"                "ADH1B"             "HAS1"             
## [1006] "EPB42"             "GPM6A"             "ADRB2"            
## [1009] "C21orf62"          "CLEC4M"            "FCN1"             
## [1012] "BMP2"              "CYP39A1"           "TFPI"             
## [1015] "NANOG"             "BNC1"              "DCLK1"            
## [1018] "HSD17B6"           "HLF"               "LGALS2"           
## [1021] "ABCA8"             "PIP5K1B"           "PCDH9"            
## [1024] "NXF3"              "SGCG"              "CLEC10A"          
## [1027] "DSC3"              "CCDC81"            "PCLO"             
## [1030] "KBTBD11"           "GHR"               "SLITRK5"          
## [1033] "PID1"              "ARHGAP6"           "NBEA"             
## [1036] "SCTR"              "CCDC69"            "VGLL3"            
## [1039] "OGN"               "ADAMTS3"           "CXCL6"            
## [1042] "LHX2"              "CHGB"              "SPAG6"            
## [1045] "NAP1L2"            "SOX3"              "NR0B1"            
## [1048] "DNAI2"             "AGBL2"             "OMD"              
## [1051] "PCOLCE2"           "COL21A1"           "HSD17B2"          
## [1054] "ASTN1"             "CRISP3"            "ADH1C"            
## [1057] "BCHE"              "RNF128"            "DPYS"             
## [1060] "RUNDC3B"           "ABCB1"             "NPY1R"            
## [1063] "IL7"               "PRG4"              "PTGDR"            
## [1066] "GIPC2"             "CRISP2"            "PTPRZ1"           
## [1069] "MEOX2"

head(OV_TCGA_GSE)[, 7:12]

##                              correlation_batch    GAPDH   TUBA1B   TUBA1C
## TCGA-29-1692-01B-01R-0808-01         1.0000000 12.58758 11.80367 11.75346
## TCGA-13-1488-01A-01R-0477-01         0.9141113 12.68406 11.78490 11.91617
## TCGA-13-1483-01A-01R-0477-01         0.9194885 12.58758 11.16848 11.15961
## TCGA-24-2024-01A-02R-0668-01         0.9145925 12.49987 11.62928 11.55965
## TCGA-13-0908-01B-01R-0453-01         0.9069363 12.46622 11.47308 11.57144
## TCGA-04-1341-01A-01R-0434-01         0.8990117 12.75124 11.90759 12.04805
##                                RPL18A    COX6C
## TCGA-29-1692-01B-01R-0808-01 11.47641 11.08334
## TCGA-13-1488-01A-01R-0477-01 10.88576 11.63995
## TCGA-13-1483-01A-01R-0477-01 11.18119 11.21686
## TCGA-24-2024-01A-02R-0668-01 11.55521 10.57146
## TCGA-13-0908-01B-01R-0453-01 10.83822 12.06827
## TCGA-04-1341-01A-01R-0434-01 11.65504 10.84748

covariates<- gsub("-", ".",covariates)

colnames(OV_TCGA_GSE)[-c(1:7)]<- gsub("-", ".",colnames(OV_TCGA_GSE)[-c(1:7)])

#############################
# Univariate Cox regression##
#############################

univ_formulas <- sapply(covariates,
                        function(x) as.formula(paste('Surv(OV_TCGA_GSE$time, OV_TCGA_GSE$vital_status=="Dead")~', x)))
univ_models <- lapply( univ_formulas, function(x){coxph(x, data = OV_TCGA_GSE)})

## Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
## Loglik converged before variable 1 ; coefficient may be infinite.

## Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
## Loglik converged before variable 1 ; coefficient may be infinite.

cox<- summary(univ_models)
# Extract data 
univ_results <- lapply(univ_models,
                       function(x){ 
                         x <- summary(x)
                         p.value<-signif(x$wald["pvalue"], digits=2)
                         wald.test<-signif(x$wald["test"], digits=2)
                         beta<-signif(x$coef[1], digits=2);#coeficient beta
                         HR <-signif(x$coef[2], digits=2);#exp(beta)
                         HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
                         HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
                         HR <- paste0(HR, " (", 
                                      HR.confint.lower, "-", HR.confint.upper, ")")
                         res<-c(beta, HR, wald.test, p.value)
                         names(res)<-c("beta", "HR (95% CI for HR)", "wald.test", 
                                       "p.value")
                         return(res)
                         #return(exp(cbind(coef(x),confint(x))))
                       })

res <- t(as.data.frame(univ_results, check.names = FALSE))
res<- data.frame(res)
res$p.value<- as.numeric(res$p.value)
res<- res[order(res$p.value, decreasing = F),]
res_pvalue_TCGA_GSE<-res[res$p.value<0.05,]
str(res_pvalue_TCGA_GSE)

## 'data.frame':    237 obs. of  4 variables:
##  $ beta              : chr  "0.33" "0.28" "0.4" "0.18" ...
##  $ HR..95..CI.for.HR.: chr  "1.4 (1.2-1.6)" "1.3 (1.2-1.5)" "1.5 (1.3-1.8)" "1.2 (1.1-1.3)" ...
##  $ wald.test         : chr  "31" "23" "22" "22" ...
##  $ p.value           : num  2.4e-08 1.6e-06 2.5e-06 2.8e-06 6.4e-06 1.0e-05 1.9e-05 3.1e-05 3.9e-05 4.1e-05 ...

res_pvalue_TCGA_GSE$wald.test<- as.numeric(res_pvalue_TCGA_GSE$wald.test)
res_pvalue_TCGA_GSE$HR<- res_pvalue_TCGA_GSE$HR..95..CI.for.HR.
res_pvalue_TCGA_GSE$HR<- gsub("[(].*", "",res_pvalue_TCGA_GSE$HR)
res_pvalue_TCGA_GSE$HR<- as.numeric(res_pvalue_TCGA_GSE$HR)
res_pvalue_TCGA_GSE_HR<- res_pvalue_TCGA_GSE[(res_pvalue_TCGA_GSE$HR>1.1 | res_pvalue_TCGA_GSE$HR<0.9), ]
 

############################################################################
#Univariate significant genes obtained from univ analysis of cox regression#
############################################################################

KMgenes_TCGA_GSE<- rownames(res_pvalue_TCGA_GSE_HR)
KMgenes_TCGA_GSE # Univariates significant genes obtained from 5 datasets

##   [1] "GALNT10"  "FZD1"     "RHOT1"    "EDNRA"    "RPS6KA2"  "GFPT2"   
##   [7] "ZEB1"     "ASAH1"    "ECM2"     "BTN3A3"   "PDGFRA"   "PMP22"   
##  [13] "PDPN"     "GALNT6"   "WT1"      "PDIA4"    "PDLIM3"   "DAB2"    
##  [19] "PTGER3"   "OMD"      "SVEP1"    "FJX1"     "GSTP1"    "PPP1R12B"
##  [25] "NUCB1"    "CPEB1"    "KLF2"     "LYVE1"    "SNRPC"    "DSE"     
##  [31] "MCM3"     "CERK"     "PCDH9"    "ARF5"     "EXOSC4"   "STXBP1"  
##  [37] "STX12"    "MRPL4"    "MBNL3"    "LOXL2"    "FLNC"     "GNG12"   
##  [43] "RBMS3"    "PIK3R1"   "KIF22"    "MITF"     "FANCI"    "SHMT2"   
##  [49] "OLFML1"   "FUT8"     "MTSS1"    "ZNF330"   "SYNGR2"   "PTGDR"   
##  [55] "USP18"    "MEOX2"    "PUF60"    "FAM8A1"   "DHRS7"    "HSD17B6" 
##  [61] "PDLIM5"   "BACE2"    "GREB1"    "DDR2"     "PKD2"     "RASA1"   
##  [67] "B4GALT5"  "PFN1"     "DDHD2"    "UCP2"     "BMP4"     "MRPL12"  
##  [73] "CBX7"     "UBL3"     "SCTR"     "TJP2"     "P2RX7"    "AKT3"    
##  [79] "GNE"      "S100A13"  "GPRC5B"   "GRAMD1C"  "RPL18A"   "HMGB3"   
##  [85] "TIAM1"    "TMED10"   "OSTM1"    "CTSA"     "ADRB2"    "BMP2"    
##  [91] "CCT5"     "CCDC85B"  "VGLL3"    "THBD"     "PDLIM7"   "DIAPH2"  
##  [97] "FARSA"    "ST14"     "PYCR1"    "CAND2"    "EPB41L3"  "SERPINB6"
## [103] "BCL7C"    "RALA"     "OVOL2"    "GINS2"    "PPP4C"    "NXF3"    
## [109] "RFXANK"   "SAC3D1"   "ARHGEF10" "SNRK"     "GCNT1"    "RNF38"   
## [115] "FGF1"     "BAK1"     "INPP1"    "MCTP2"    "LSM4"     "TMEM9B"  
## [121] "NF1"      "CALR"

dim(OV_TCGA_GSE) #982 samples and 1069 genes

## [1]  982 1069

# From here start!!!!
head(OV_TCGA_GSE)[, 1:10]

##                              shortLetterCode                    description
## TCGA-29-1692-01B-01R-0808-01              TP Serous cystadenocarcinoma, NOS
## TCGA-13-1488-01A-01R-0477-01              TP Serous cystadenocarcinoma, NOS
## TCGA-13-1483-01A-01R-0477-01              TP Serous cystadenocarcinoma, NOS
## TCGA-24-2024-01A-02R-0668-01              TP Serous cystadenocarcinoma, NOS
## TCGA-13-0908-01B-01R-0453-01              TP Serous cystadenocarcinoma, NOS
## TCGA-04-1341-01A-01R-0434-01              TP Serous cystadenocarcinoma, NOS
##                              time vital_status batch correlation
## TCGA-29-1692-01B-01R-0808-01 2872         Dead  TCGA   1.0000000
## TCGA-13-1488-01A-01R-0477-01 2154         Dead  TCGA   0.9237833
## TCGA-13-1483-01A-01R-0477-01  895         Dead  TCGA   0.9287817
## TCGA-24-2024-01A-02R-0668-01 1769         Dead  TCGA   0.9244550
## TCGA-13-0908-01B-01R-0453-01 1843         Dead  TCGA   0.9168311
## TCGA-04-1341-01A-01R-0434-01   33        Alive  TCGA   0.9105627
##                              correlation_batch    GAPDH   TUBA1B   TUBA1C
## TCGA-29-1692-01B-01R-0808-01         1.0000000 12.58758 11.80367 11.75346
## TCGA-13-1488-01A-01R-0477-01         0.9141113 12.68406 11.78490 11.91617
## TCGA-13-1483-01A-01R-0477-01         0.9194885 12.58758 11.16848 11.15961
## TCGA-24-2024-01A-02R-0668-01         0.9145925 12.49987 11.62928 11.55965
## TCGA-13-0908-01B-01R-0453-01         0.9069363 12.46622 11.47308 11.57144
## TCGA-04-1341-01A-01R-0434-01         0.8990117 12.75124 11.90759 12.04805

set.seed(123) # set.seed(123). This is used in the paper 

trainingdata.sample<- sample(rownames(OV_TCGA_GSE), size=round(length(rownames(OV_TCGA_GSE))*0.5,0)) # training sample
testdata.sample<- rownames(OV_TCGA_GSE)[!rownames(OV_TCGA_GSE)%in%trainingdata.sample] # test sample

set.seed(123)
cv_glmnet_cox_1<-cv.glmnet(x= as.matrix(OV_TCGA_GSE[trainingdata.sample,KMgenes_TCGA_GSE]), y= Surv(OV_TCGA_GSE[trainingdata.sample,"time"],
                           OV_TCGA_GSE[trainingdata.sample,]$vital_status=="Dead"), family= "cox", nfolds = 10,
                           alpha=1
                           )
set.seed(123)
cv_glmnet_cox_.5<-cv.glmnet(x= as.matrix(OV_TCGA_GSE[trainingdata.sample,KMgenes_TCGA_GSE]), y= Surv(OV_TCGA_GSE[trainingdata.sample,"time"],
                                         OV_TCGA_GSE[trainingdata.sample,]$vital_status=="Dead"), family= "cox", nfolds = 10,
                            alpha=0.5
)

set.seed(123)
cv_glmnet_cox_0<-cv.glmnet(x= as.matrix(OV_TCGA_GSE[trainingdata.sample,KMgenes_TCGA_GSE]), y= Surv(OV_TCGA_GSE[trainingdata.sample,"time"],
                           OV_TCGA_GSE[trainingdata.sample,]$vital_status=="Dead"), family= "cox", nfolds = 10,
                           alpha=0
)


par(mfrow=c(2,2))
plot(cv_glmnet_cox_1); plot(cv_glmnet_cox_.5); plot(cv_glmnet_cox_0)

plot(log(cv_glmnet_cox_1$lambda), cv_glmnet_cox_1$cvm, pch=19, col= "red", xlab= "Log(L)")
points(log(cv_glmnet_cox_.5$lambda), cv_glmnet_cox_.5$cvm, pch=19, col= "grey", xlab= "Log(L)")
points(log(cv_glmnet_cox_0$lambda), cv_glmnet_cox_0$cvm, pch=19, col= "blue", xlab= "Log(L)")
legend("topright", legend= c("a=1", "a=0.5", "a=0"), col= c("red", "grey", "blue"), pch= 19,
       x.intersp = .75, y.intersp = .75)

##############################################
#chosing the best fit model; alpha=1; LASSO)#
#############################################
cv_glmnet_cox<- cv_glmnet_cox_1
par(mfrow=c(1,1))
plot(cv_glmnet_cox)

plot(cv_glmnet_cox$glmnet.fit, s= cv_glmnet_cox$lambda.min, label=T)

#saveRDS(cv_glmnet_cox, file="cv_glmnet_cox_survival.rds")
coefvalues<-coefficients(cv_glmnet_cox, s= cv_glmnet_cox$lambda.min)
coefvalues<- as.matrix(coefvalues)
coefvalues<-as.data.frame(coefvalues[coefvalues[,1]!=0,]) 
colnames(coefvalues)<- "cv.lamda.min"
rownames(coefvalues) #

##  [1] "RHOT1"   "EDNRA"   "RPS6KA2" "GFPT2"   "ASAH1"   "BTN3A3"  "WT1"    
##  [8] "NUCB1"   "LYVE1"   "PIK3R1"  "ZNF330"  "FAM8A1"  "BACE2"   "GREB1"  
## [15] "RASA1"   "SCTR"    "P2RX7"   "TMED10"  "DIAPH2"  "INPP1"

coefvalues # coefficients along with gene

##         cv.lamda.min
## RHOT1    0.178893259
## EDNRA    0.074975099
## RPS6KA2  0.145896482
## GFPT2    0.029641155
## ASAH1    0.108966294
## BTN3A3  -0.100227494
## WT1     -0.007357848
## NUCB1    0.037373759
## LYVE1    0.020082208
## PIK3R1   0.014898043
## ZNF330  -0.007485032
## FAM8A1  -0.045531972
## BACE2   -0.004701693
## GREB1   -0.016633823
## RASA1    0.104274856
## SCTR    -0.040192606
## P2RX7   -0.090735836
## TMED10  -0.218236008
## DIAPH2  -0.082572791
## INPP1   -0.059699597

#write.csv(coefvalues, file= "LASSO_coef.csv")
# Check the prediction power for both variables(obtained from cv_glmnet)
#and coxgenes_merged (obtained via cox regression analysis)
mPS.Score<-  predict(cv_glmnet_cox,s=c(0,cv_glmnet_cox$lambda.min, cv_glmnet_cox$lambda.1se),
                     newx = as.matrix(OV_TCGA_GSE[ , KMgenes_TCGA_GSE]), type= "link")
colnames(mPS.Score)<- c("lamda.all", "lamda.min", "lamda.1se")
head(mPS.Score)

##                              lamda.all  lamda.min lamda.1se
## TCGA-29-1692-01B-01R-0808-01 -18.14502 0.07776236         0
## TCGA-13-1488-01A-01R-0477-01 -15.44010 0.09313776         0
## TCGA-13-1483-01A-01R-0477-01 -14.88338 0.25075725         0
## TCGA-24-2024-01A-02R-0668-01 -17.71287 0.10741438         0
## TCGA-13-0908-01B-01R-0453-01 -16.44914 0.23766284         0
## TCGA-04-1341-01A-01R-0434-01 -16.20803 0.48304893         0

OV.Surv_Score<-cbind.data.frame(OV_TCGA_GSE[,c(1:7)],mPS.Score)
plot(mPS.Score[,1], mPS.Score[,2])

#################################################
# mPS stratification based on High vs Low score #
#################################################

Q<-quantile(OV.Surv_Score$lamda.min)
Q

##          0%         25%         50%         75%        100% 
## -0.52734240  0.08926057  0.25029223  0.41508097  1.17822658

OV.Surv_Score$lamda.min_Q<- factor(ifelse(OV.Surv_Score$lamda.min>Q[4],"Q4", 
                                                   ifelse(OV.Surv_Score$lamda.min>Q[3],"Q3",
                                                          ifelse(OV.Surv_Score$lamda.min>Q[2],"Q2",
                                                                 "Q1")))) 

OV.Surv_Score$lamda.min_QHL<- factor(ifelse(OV.Surv_Score$lamda.min>Q[3],"High", 
                                                     ifelse(OV.Surv_Score$lamda.min<Q[3],"Low",
                                                            NA)))
OV.Surv_Score$lamda.min_QHL<- relevel(OV.Surv_Score$lamda.min_QHL, ref= "Low")

par(mfrow=c(1,2))


#lamda.min_training
OV_pred<-survivalROC(Stime=OV.Surv_Score$time[rownames(OV.Surv_Score)%in%trainingdata.sample],
                     status= as.numeric(OV.Surv_Score$vital_status=="Dead")[rownames(OV.Surv_Score)%in%trainingdata.sample],
                     marker=OV.Surv_Score$lamda.min[rownames(OV.Surv_Score)%in%trainingdata.sample],
                     predict.time = 3*365, method= "KM"
)

plot(OV_pred$FP, OV_pred$TP, type="l", col= "red" ,xlim=c(0,1), ylim=c(0,1),  
     
     xlab=paste( "FP", "\n", "AUC = ",round(OV_pred$AUC,3)), 
     ylab="TP",main="mPS_lamda.all, Method = KM \n Year = 3, Sample= Trainingdata")
abline(0,1)
#lamda.min_test
OV_pred<-survivalROC(Stime=OV.Surv_Score$time[rownames(OV.Surv_Score)%in%testdata.sample],
                     status= as.numeric(OV.Surv_Score$vital_status=="Dead")[rownames(OV.Surv_Score)%in%testdata.sample],
                     marker=OV.Surv_Score$lamda.min[rownames(OV.Surv_Score)%in%testdata.sample],
                     predict.time = 3*365, method= "KM"
)

plot(OV_pred$FP, OV_pred$TP, type="l", col= "red" ,xlim=c(0,1), ylim=c(0,1),  
     
     xlab=paste( "FP", "\n", "AUC = ",round(OV_pred$AUC,3)), 
     ylab="TP",main="mPS_lamda.all, Method = KM \n Year = 3, Sample= Testdata")
abline(0,1)

# multiple line graphs
#lamda.min_Trainingdata
par(mfrow= c(1,2))
my.colors<- c("black", "violet", "blue", "green", "red")
set.seed(123)

OV_pred<-list(
  "1"="",
  "2"="",
  "3"="",
  "4"="",
  "5"=""
)
for (i in 1:5) {
  OV_pred[[i]]<-survivalROC(Stime=OV.Surv_Score$time[rownames(OV.Surv_Score)%in%trainingdata.sample],
                            status= as.numeric(OV.Surv_Score$vital_status=="Dead")[rownames(OV.Surv_Score)%in%trainingdata.sample],
                            marker=OV.Surv_Score$lamda.min[rownames(OV.Surv_Score)%in%trainingdata.sample],
                          predict.time = i*365, method= "KM"
  )}


plot(OV_pred[[1]]$FP, OV_pred[[1]]$TP, type="l", col= "black" ,xlim=c(0,1), ylim=c(0,1),  
     
     xlab=paste( "FP"), 
     ylab="TP",main="TrainingData")
abline(0,1)

for (i in 2:5) {lines(OV_pred[[i]]$FP, OV_pred[[i]]$TP, type="l", col= my.colors[i] ,xlim=c(0,1), ylim=c(0,1))
}

txt<- c(round(OV_pred[[1]]$AUC,2),round(OV_pred[[2]]$AUC,2),round(OV_pred[[3]]$AUC,2),round(OV_pred[[4]]$AUC,2),
        round(OV_pred[[5]]$AUC,2)) 

legend("bottomright", legend= c(1:5, txt), col= my.colors, lty=1, ncol=2, cex=0.75, title=paste0("Year", "      AUC"),
       y.intersp=0.75,x.intersp = 0)

#for testdata
my.colors<- c("black", "violet", "blue", "green", "red")
set.seed(123)

OV_pred<-list(
  "1"="",
  "2"="",
  "3"="",
  "4"="",
  "5"=""
)
for (i in 1:5) {
  OV_pred[[i]]<-survivalROC(Stime=OV.Surv_Score$time[rownames(OV.Surv_Score)%in%testdata.sample],
                            status= as.numeric(OV.Surv_Score$vital_status=="Dead")[rownames(OV.Surv_Score)%in%testdata.sample],
                            marker=OV.Surv_Score$lamda.min[rownames(OV.Surv_Score)%in%testdata.sample],
                            predict.time = i*365, method= "KM"
  )}


plot(OV_pred[[1]]$FP, OV_pred[[1]]$TP, type="l", col= "black" ,xlim=c(0,1), ylim=c(0,1),  
     
     xlab=paste( "FP"), 
     ylab="TP",main="TestData")
abline(0,1)

for (i in 2:5) {lines(OV_pred[[i]]$FP, OV_pred[[i]]$TP, type="l", col= my.colors[i] ,xlim=c(0,1), ylim=c(0,1))
}

txt<- c(round(OV_pred[[1]]$AUC,2),round(OV_pred[[2]]$AUC,2),round(OV_pred[[3]]$AUC,2),round(OV_pred[[4]]$AUC,2),
        round(OV_pred[[5]]$AUC,2)) 

legend("bottomright", legend= c(1:5, txt), col= my.colors, ncol=2,lty=1, cex=0.75, title=paste0("Year", "      AUC"),
       y.intersp=0.75,x.intersp = 0)

##########################
#Survival Plot#
########################
#only for trainingdata
OV.Surv_Score_Training<- OV.Surv_Score[rownames(OV.Surv_Score)%in%trainingdata.sample,]

Q<-quantile(OV.Surv_Score_Training$lamda.min)
Q

##          0%         25%         50%         75%        100% 
## -0.52734240  0.08691849  0.24052934  0.41042759  1.17822658

OV.Surv_Score_Training$lamda.min_Q<- factor(ifelse(OV.Surv_Score_Training$lamda.min>Q[4],"Q4", 
                                               ifelse(OV.Surv_Score_Training$lamda.min>Q[3],"Q3",
                                                      ifelse(OV.Surv_Score_Training$lamda.min>Q[2],"Q2",
                                                             "Q1")))) 


OV.Surv_Score_Training$lamda.min_QHL<- factor(ifelse(OV.Surv_Score_Training$lamda.min>Q[3],"High", 
                                                 ifelse(OV.Surv_Score_Training$lamda.min<Q[3],"Low",
                                                        NA)))


fit_surv<- surv_fit(Surv(OV.Surv_Score_Training$time, OV.Surv_Score_Training$vital_status=="Dead")~ lamda.min_QHL,
                    data=OV.Surv_Score_Training)


fig4a<-ggsurvplot(fit_surv, conf.int=FALSE, pval=TRUE, risk.table=TRUE,
           palette = c("red", "blue"),
           legend.title="mPSHvsL",
           legend.labs= levels(OV.Surv_Score_Training$lamda.min_QHL),
           test.for.trend = F,
           surv.median.line = "hv",
           title="KM_TrainingData" 
)

# Four grps Q1>25% cut off; Q2=25-50%, Q3=50-75%, Q4<75%
fit_surv<- surv_fit(Surv(OV.Surv_Score_Training$time, OV.Surv_Score_Training$vital_status=="Dead")~ lamda.min_Q,
                    data=OV.Surv_Score_Training)

fig4b<-ggsurvplot(fit_surv, conf.int=FALSE, pval=TRUE, risk.table=TRUE,
           palette = c("black", "blue", "green" ,"red"),
           legend.title="mPSQ",
           legend.labs= levels(OV.Surv_Score_Training$lamda.min_Q),
           test.for.trend = F,
           surv.median.line = "hv",
           title="KM_TrainingData" 
)




#for testdata

OV.Surv_Score_Test<- OV.Surv_Score[rownames(OV_TCGA_GSE)%in%testdata.sample,]

Q<-quantile(OV.Surv_Score_Test$lamda.min)
Q

##          0%         25%         50%         75%        100% 
## -0.52426506  0.09061088  0.25978471  0.41631065  0.95928724

OV.Surv_Score_Test$lamda.min_Q<- factor(ifelse(OV.Surv_Score_Test$lamda.min>Q[4],"Q4", 
                                                   ifelse(OV.Surv_Score_Test$lamda.min>Q[3],"Q3",
                                                          ifelse(OV.Surv_Score_Test$lamda.min>Q[2],"Q2",
                                                                 "Q1")))) 


OV.Surv_Score_Test$lamda.min_QHL<- factor(ifelse(OV.Surv_Score_Test$lamda.min>Q[3],"High", 
                                                     ifelse(OV.Surv_Score_Test$lamda.min<Q[3],"Low",
                                                            NA)))


fit_surv<- surv_fit(Surv(OV.Surv_Score_Test$time, OV.Surv_Score_Test$vital_status=="Dead")~ lamda.min_QHL,
                    data=OV.Surv_Score_Test)


fig4c<-ggsurvplot(fit_surv, conf.int=FALSE, pval=TRUE, risk.table=TRUE,
           palette = c("red", "blue"),
           legend.title="mPSHvsL",
           legend.labs= levels(OV.Surv_Score_Test$lamda.min_QHL),
           test.for.trend = F,
           surv.median.line = "hv",
           title="KM_TestData" 
)

# Four grps Q1>25% cut off; Q2=25-50%, Q3=50-75%, Q4<75%
fit_surv<- surv_fit(Surv(OV.Surv_Score_Test$time, OV.Surv_Score_Test$vital_status=="Dead")~ lamda.min_Q,
                    data=OV.Surv_Score_Test)

fig4d<-ggsurvplot(fit_surv, conf.int=FALSE, pval=TRUE, risk.table=TRUE,
           palette = c("black", "blue", "green" ,"red"),
           legend.title="mPSQ",
           legend.labs= levels(OV.Surv_Score_Test$lamda.min_Q),
           test.for.trend = F,
           surv.median.line = "hv",
           title="KM_TestData"  
)
fig.ggplots<- ggarrange(fig4a$plot,fig4b$plot,fig4c$plot,fig4d$plot, 
                        labels= c("(A)", "(B)", "(C)", "(D)"),
                                  ncol=2, nrow=2)

fig.ggplots

#
head(OV_TCGA_GSE)[, 1:10]

##                              shortLetterCode                    description
## TCGA-29-1692-01B-01R-0808-01              TP Serous cystadenocarcinoma, NOS
## TCGA-13-1488-01A-01R-0477-01              TP Serous cystadenocarcinoma, NOS
## TCGA-13-1483-01A-01R-0477-01              TP Serous cystadenocarcinoma, NOS
## TCGA-24-2024-01A-02R-0668-01              TP Serous cystadenocarcinoma, NOS
## TCGA-13-0908-01B-01R-0453-01              TP Serous cystadenocarcinoma, NOS
## TCGA-04-1341-01A-01R-0434-01              TP Serous cystadenocarcinoma, NOS
##                              time vital_status batch correlation
## TCGA-29-1692-01B-01R-0808-01 2872         Dead  TCGA   1.0000000
## TCGA-13-1488-01A-01R-0477-01 2154         Dead  TCGA   0.9237833
## TCGA-13-1483-01A-01R-0477-01  895         Dead  TCGA   0.9287817
## TCGA-24-2024-01A-02R-0668-01 1769         Dead  TCGA   0.9244550
## TCGA-13-0908-01B-01R-0453-01 1843         Dead  TCGA   0.9168311
## TCGA-04-1341-01A-01R-0434-01   33        Alive  TCGA   0.9105627
##                              correlation_batch    GAPDH   TUBA1B   TUBA1C
## TCGA-29-1692-01B-01R-0808-01         1.0000000 12.58758 11.80367 11.75346
## TCGA-13-1488-01A-01R-0477-01         0.9141113 12.68406 11.78490 11.91617
## TCGA-13-1483-01A-01R-0477-01         0.9194885 12.58758 11.16848 11.15961
## TCGA-24-2024-01A-02R-0668-01         0.9145925 12.49987 11.62928 11.55965
## TCGA-13-0908-01B-01R-0453-01         0.9069363 12.46622 11.47308 11.57144
## TCGA-04-1341-01A-01R-0434-01         0.8990117 12.75124 11.90759 12.04805

m<- match(rownames(OV_TCGA_GSE), rownames(OV.Surv_Score)) # perfect match
# perfect match
OV.Surv_Score_Training$lamda.min_QHL<- relevel(OV.Surv_Score_Training$lamda.min_QHL, ref= "Low") 
coefvalues$x<- c(1:nrow(coefvalues))
coefvalues<-coefvalues[order(coefvalues$cv.lamda.min, decreasing = T),] 
coefvalues$x<- c(1:nrow(coefvalues))
dim(OV_TCGA_GSE)

## [1]  982 1069

head(OV_TCGA_GSE)[, 7:10]

##                              correlation_batch    GAPDH   TUBA1B   TUBA1C
## TCGA-29-1692-01B-01R-0808-01         1.0000000 12.58758 11.80367 11.75346
## TCGA-13-1488-01A-01R-0477-01         0.9141113 12.68406 11.78490 11.91617
## TCGA-13-1483-01A-01R-0477-01         0.9194885 12.58758 11.16848 11.15961
## TCGA-24-2024-01A-02R-0668-01         0.9145925 12.49987 11.62928 11.55965
## TCGA-13-0908-01B-01R-0453-01         0.9069363 12.46622 11.47308 11.57144
## TCGA-04-1341-01A-01R-0434-01         0.8990117 12.75124 11.90759 12.04805

y<-OV_TCGA_GSE[,rownames(coefvalues)] # data with only geneids assocaited with lamda.min
dim(y)

## [1] 982  20

y.training<- y[rownames(OV.Surv_Score_Training),] # training samples
y.test<-y[rownames(OV.Surv_Score_Test),]

head(y.training)[, 1:5]

##                                 RHOT1  RPS6KA2    ASAH1    RASA1    EDNRA
## TCGA-13-0908-01B-01R-0453-01 6.796971 5.097088 9.301658 7.245542 6.463844
## TCGA-04-1341-01A-01R-0434-01 7.361495 5.813764 8.703696 7.489507 5.687678
## TCGA-36-1575-01A-01R-0538-01 7.005700 5.638665 8.536276 7.886416 6.001421
## TCGA-25-2408-01A-01R-0709-01 7.232662 6.724524 9.411316 8.709715 7.433551
## TCGA-25-1877-01A-01R-0653-01 7.571181 4.747518 9.620693 8.018625 7.404528
## TCGA-61-2094-01A-01R-0668-01 6.471896 5.779449 9.734551 7.891222 5.576870

match(rownames(y.training), rownames(OV.Surv_Score_Training))

##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## [217] 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## [235] 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
## [253] 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## [271] 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
## [289] 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
## [307] 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
## [325] 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
## [343] 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
## [361] 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
## [379] 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
## [397] 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
## [415] 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
## [433] 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
## [451] 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
## [469] 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486
## [487] 487 488 489 490 491

match(rownames(y.test), rownames(OV.Surv_Score_Test))

##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## [217] 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## [235] 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
## [253] 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## [271] 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
## [289] 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
## [307] 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
## [325] 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
## [343] 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
## [361] 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
## [379] 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
## [397] 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
## [415] 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
## [433] 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
## [451] 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
## [469] 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486
## [487] 487 488 489 490 491

y.training<- cbind(y.training,OV.Surv_Score_Training$lamda.min,OV.Surv_Score_Training$lamda.min_QHL, OV.Surv_Score_Training$lamda.min_Q)
y.test<- cbind(y.test,OV.Surv_Score_Test$lamda.min,OV.Surv_Score_Test$lamda.min_QHL, OV.Surv_Score_Test$lamda.min_Q)

ncol(y.training)

## [1] 23

ncol(y.test)

## [1] 23

x<- ncol(y.training)-3
x.test<- ncol(y.test)-3
colnames(y.training)[-c(1:x)] <-c("mPS.lamda.min", "mPS_QHL", "mPS_Q")
colnames(y.test)[-c(1:x.test)] <-c("mPS.lamda.min", "mPS_QHL", "mPS_Q")

y.training<- y.training[order(y.training[,"mPS.lamda.min"], decreasing = T),]
y.training[,"mPS.lamda.min"]

##   [1]  1.178226583  0.974262535  0.962140793  0.955170687  0.953000659
##   [6]  0.866465544  0.863709849  0.821080885  0.805024397  0.803373978
##  [11]  0.800112033  0.800088426  0.789026058  0.784576709  0.758187269
##  [16]  0.748624185  0.748519927  0.744531149  0.741834435  0.730623952
##  [21]  0.730499363  0.728757392  0.726902549  0.715452334  0.693374710
##  [26]  0.667811918  0.664971602  0.660187858  0.655550204  0.647482533
##  [31]  0.645173201  0.644543525  0.642283038  0.639969327  0.628516385
##  [36]  0.628186940  0.626513140  0.620127594  0.612924908  0.611782867
##  [41]  0.610529261  0.608295922  0.607204855  0.606368571  0.604484751
##  [46]  0.602210886  0.599343346  0.598985385  0.592652431  0.592547535
##  [51]  0.591890673  0.589011902  0.588274185  0.588172895  0.577380584
##  [56]  0.576667979  0.574786299  0.571448976  0.569190983  0.568764405
##  [61]  0.564356512  0.559335279  0.553904193  0.552063966  0.550265784
##  [66]  0.548457432  0.543017247  0.540169202  0.537935256  0.537385148
##  [71]  0.536123175  0.533628224  0.531774213  0.531450840  0.529569800
##  [76]  0.529337240  0.522288772  0.518587072  0.516621185  0.513832654
##  [81]  0.511948392  0.509198650  0.505018273  0.502826699  0.500525956
##  [86]  0.499181109  0.498793585  0.496303812  0.495943735  0.483048930
##  [91]  0.482611228  0.482409132  0.480304751  0.479371858  0.475674823
##  [96]  0.472410063  0.472304848  0.471517091  0.470170636  0.469461330
## [101]  0.467639208  0.463559993  0.463405659  0.463344033  0.458188752
## [106]  0.458072667  0.457588742  0.457009422  0.456048125  0.453985667
## [111]  0.451576483  0.448455298  0.442972720  0.441835899  0.440091831
## [116]  0.439005925  0.437317295  0.435201122  0.426670657  0.417571659
## [121]  0.415845807  0.415399987  0.411218524  0.409636658  0.409499608
## [126]  0.408705732  0.406851252  0.406848903  0.405930788  0.403021181
## [131]  0.401202002  0.399245412  0.398993505  0.398293540  0.395530052
## [136]  0.395387062  0.395082937  0.393464349  0.391445041  0.390268913
## [141]  0.388221147  0.387946096  0.386253066  0.386007403  0.383649060
## [146]  0.383273170  0.382238620  0.382031915  0.381881341  0.380752066
## [151]  0.378259085  0.376018563  0.375252590  0.373905888  0.373616949
## [156]  0.371584721  0.369207243  0.367999656  0.367686517  0.364992071
## [161]  0.363899064  0.359723236  0.356370548  0.355412048  0.353814279
## [166]  0.353037069  0.352403073  0.351988080  0.351832404  0.350682820
## [171]  0.348015104  0.347132420  0.346856820  0.346533335  0.345901278
## [176]  0.345843233  0.344661352  0.343763518  0.341060033  0.340384392
## [181]  0.339667087  0.339447488  0.339389826  0.337757971  0.336642403
## [186]  0.335240006  0.334122064  0.332480290  0.330589717  0.328566728
## [191]  0.326792914  0.324362328  0.323556181  0.323482032  0.323317559
## [196]  0.320315694  0.319157319  0.317308057  0.317242415  0.315961777
## [201]  0.315140091  0.313458738  0.311240876  0.310927106  0.310606472
## [206]  0.309955122  0.307994488  0.303763976  0.303236007  0.302160334
## [211]  0.301404312  0.301180562  0.296130290  0.294187329  0.293971525
## [216]  0.292812310  0.290196738  0.288108875  0.285445253  0.282431453
## [221]  0.280722139  0.280198937  0.279150139  0.277844930  0.276199887
## [226]  0.272265624  0.271483391  0.270289234  0.270239266  0.270047417
## [231]  0.266873531  0.266727044  0.266357798  0.263899922  0.255279700
## [236]  0.251904765  0.249827204  0.247839807  0.247755055  0.244709827
## [241]  0.244611438  0.244488842  0.241983188  0.241727348  0.240698054
## [246]  0.240529342  0.240436495  0.240010516  0.238027550  0.237662844
## [251]  0.233907711  0.233760180  0.233419725  0.231350983  0.231185312
## [256]  0.231107140  0.231040162  0.229706958  0.229421152  0.229008468
## [261]  0.228041057  0.223347353  0.223320773  0.221015702  0.219813039
## [266]  0.218079725  0.217013383  0.215377691  0.211498359  0.209217846
## [271]  0.208437105  0.206982074  0.203386403  0.201433594  0.199451787
## [276]  0.197953850  0.197801353  0.197476368  0.197238009  0.195603591
## [281]  0.194379815  0.193261599  0.192949135  0.192935577  0.191938221
## [286]  0.191792955  0.189844710  0.189317109  0.188891175  0.187058399
## [291]  0.185966627  0.185644972  0.185327269  0.185272521  0.184832241
## [296]  0.183411312  0.180018356  0.179519503  0.178926352  0.176595771
## [301]  0.175849976  0.174725748  0.173475293  0.173438482  0.169021795
## [306]  0.164329913  0.163684091  0.163558398  0.160282530  0.159025995
## [311]  0.159021583  0.155931517  0.155086122  0.153093232  0.151937894
## [316]  0.151857716  0.151691516  0.151254473  0.148056107  0.147742454
## [321]  0.147598485  0.146413571  0.145658006  0.143509180  0.142767254
## [326]  0.140891653  0.140488776  0.138385516  0.138054599  0.137212836
## [331]  0.137136333  0.136483152  0.131889688  0.126459930  0.126315187
## [336]  0.125997595  0.124911240  0.122942039  0.122385389  0.121456478
## [341]  0.120888028  0.119214951  0.118896849  0.118353549  0.116936758
## [346]  0.116352925  0.114878429  0.114870581  0.113298971  0.111783899
## [351]  0.110823593  0.110083710  0.109592618  0.109169290  0.108884328
## [356]  0.108463099  0.107433411  0.107157816  0.107042277  0.105247477
## [361]  0.104436716  0.103928756  0.100059566  0.098479913  0.097325066
## [366]  0.094428593  0.091233986  0.087251463  0.086585507  0.083593108
## [371]  0.083101521  0.082854107  0.081669120  0.081412905  0.081380553
## [376]  0.079773645  0.077769330  0.077187840  0.076474499  0.070321590
## [381]  0.069054405  0.065625084  0.065539606  0.065403119  0.064968257
## [386]  0.059795807  0.059341733  0.056874860  0.056111969  0.055719326
## [391]  0.055411024  0.054283134  0.054263065  0.049330734  0.046535241
## [396]  0.045090791  0.044845784  0.044640074  0.036623192  0.033688004
## [401]  0.031645962  0.031067564  0.029514896  0.028954345  0.028120570
## [406]  0.027572887  0.025070533  0.022120724  0.020756312  0.015821488
## [411]  0.013446937  0.012468640  0.011813756  0.010964549  0.005553016
## [416]  0.004774123  0.003141584 -0.003982315 -0.004752782 -0.006512224
## [421] -0.006910454 -0.007534220 -0.010632302 -0.011939872 -0.012369411
## [426] -0.012440809 -0.012497556 -0.018626493 -0.034189099 -0.034266728
## [431] -0.034827224 -0.036745758 -0.038611040 -0.039123591 -0.041308484
## [436] -0.043412188 -0.048015597 -0.050186218 -0.051180312 -0.056920154
## [441] -0.057084644 -0.057245774 -0.059433005 -0.060058924 -0.063671503
## [446] -0.064778594 -0.066480355 -0.072314502 -0.073646768 -0.074490570
## [451] -0.074551019 -0.074935363 -0.078639997 -0.078987439 -0.080637636
## [456] -0.083505510 -0.084777361 -0.097351924 -0.097840272 -0.104092846
## [461] -0.104920752 -0.111054264 -0.111388281 -0.112119026 -0.119474009
## [466] -0.138684143 -0.139462830 -0.143611248 -0.149387518 -0.165823468
## [471] -0.169815610 -0.170311719 -0.171695962 -0.189538684 -0.196173560
## [476] -0.201444724 -0.207169429 -0.207732707 -0.222523929 -0.226585571
## [481] -0.227571057 -0.243313449 -0.256198081 -0.258556973 -0.270734427
## [486] -0.294921384 -0.312724842 -0.327774703 -0.392414221 -0.447158136
## [491] -0.527342399

y.test<- y.test[order(y.test[,"mPS.lamda.min"], decreasing = T),]

library(pheatmap)
library(RColorBrewer)
colfunc<-colorRampPalette(c("springgreen","yellow","red"))

annotation_col = data.frame(
  mPS=as.numeric(y.training$mPS.lamda.min),
  mPS_Q=factor(y.training$mPS_Q),
  mPS_QHL=factor(y.training$mPS_QHL)
)
annotation_col$mPS

##   [1]  1.178226583  0.974262535  0.962140793  0.955170687  0.953000659
##   [6]  0.866465544  0.863709849  0.821080885  0.805024397  0.803373978
##  [11]  0.800112033  0.800088426  0.789026058  0.784576709  0.758187269
##  [16]  0.748624185  0.748519927  0.744531149  0.741834435  0.730623952
##  [21]  0.730499363  0.728757392  0.726902549  0.715452334  0.693374710
##  [26]  0.667811918  0.664971602  0.660187858  0.655550204  0.647482533
##  [31]  0.645173201  0.644543525  0.642283038  0.639969327  0.628516385
##  [36]  0.628186940  0.626513140  0.620127594  0.612924908  0.611782867
##  [41]  0.610529261  0.608295922  0.607204855  0.606368571  0.604484751
##  [46]  0.602210886  0.599343346  0.598985385  0.592652431  0.592547535
##  [51]  0.591890673  0.589011902  0.588274185  0.588172895  0.577380584
##  [56]  0.576667979  0.574786299  0.571448976  0.569190983  0.568764405
##  [61]  0.564356512  0.559335279  0.553904193  0.552063966  0.550265784
##  [66]  0.548457432  0.543017247  0.540169202  0.537935256  0.537385148
##  [71]  0.536123175  0.533628224  0.531774213  0.531450840  0.529569800
##  [76]  0.529337240  0.522288772  0.518587072  0.516621185  0.513832654
##  [81]  0.511948392  0.509198650  0.505018273  0.502826699  0.500525956
##  [86]  0.499181109  0.498793585  0.496303812  0.495943735  0.483048930
##  [91]  0.482611228  0.482409132  0.480304751  0.479371858  0.475674823
##  [96]  0.472410063  0.472304848  0.471517091  0.470170636  0.469461330
## [101]  0.467639208  0.463559993  0.463405659  0.463344033  0.458188752
## [106]  0.458072667  0.457588742  0.457009422  0.456048125  0.453985667
## [111]  0.451576483  0.448455298  0.442972720  0.441835899  0.440091831
## [116]  0.439005925  0.437317295  0.435201122  0.426670657  0.417571659
## [121]  0.415845807  0.415399987  0.411218524  0.409636658  0.409499608
## [126]  0.408705732  0.406851252  0.406848903  0.405930788  0.403021181
## [131]  0.401202002  0.399245412  0.398993505  0.398293540  0.395530052
## [136]  0.395387062  0.395082937  0.393464349  0.391445041  0.390268913
## [141]  0.388221147  0.387946096  0.386253066  0.386007403  0.383649060
## [146]  0.383273170  0.382238620  0.382031915  0.381881341  0.380752066
## [151]  0.378259085  0.376018563  0.375252590  0.373905888  0.373616949
## [156]  0.371584721  0.369207243  0.367999656  0.367686517  0.364992071
## [161]  0.363899064  0.359723236  0.356370548  0.355412048  0.353814279
## [166]  0.353037069  0.352403073  0.351988080  0.351832404  0.350682820
## [171]  0.348015104  0.347132420  0.346856820  0.346533335  0.345901278
## [176]  0.345843233  0.344661352  0.343763518  0.341060033  0.340384392
## [181]  0.339667087  0.339447488  0.339389826  0.337757971  0.336642403
## [186]  0.335240006  0.334122064  0.332480290  0.330589717  0.328566728
## [191]  0.326792914  0.324362328  0.323556181  0.323482032  0.323317559
## [196]  0.320315694  0.319157319  0.317308057  0.317242415  0.315961777
## [201]  0.315140091  0.313458738  0.311240876  0.310927106  0.310606472
## [206]  0.309955122  0.307994488  0.303763976  0.303236007  0.302160334
## [211]  0.301404312  0.301180562  0.296130290  0.294187329  0.293971525
## [216]  0.292812310  0.290196738  0.288108875  0.285445253  0.282431453
## [221]  0.280722139  0.280198937  0.279150139  0.277844930  0.276199887
## [226]  0.272265624  0.271483391  0.270289234  0.270239266  0.270047417
## [231]  0.266873531  0.266727044  0.266357798  0.263899922  0.255279700
## [236]  0.251904765  0.249827204  0.247839807  0.247755055  0.244709827
## [241]  0.244611438  0.244488842  0.241983188  0.241727348  0.240698054
## [246]  0.240529342  0.240436495  0.240010516  0.238027550  0.237662844
## [251]  0.233907711  0.233760180  0.233419725  0.231350983  0.231185312
## [256]  0.231107140  0.231040162  0.229706958  0.229421152  0.229008468
## [261]  0.228041057  0.223347353  0.223320773  0.221015702  0.219813039
## [266]  0.218079725  0.217013383  0.215377691  0.211498359  0.209217846
## [271]  0.208437105  0.206982074  0.203386403  0.201433594  0.199451787
## [276]  0.197953850  0.197801353  0.197476368  0.197238009  0.195603591
## [281]  0.194379815  0.193261599  0.192949135  0.192935577  0.191938221
## [286]  0.191792955  0.189844710  0.189317109  0.188891175  0.187058399
## [291]  0.185966627  0.185644972  0.185327269  0.185272521  0.184832241
## [296]  0.183411312  0.180018356  0.179519503  0.178926352  0.176595771
## [301]  0.175849976  0.174725748  0.173475293  0.173438482  0.169021795
## [306]  0.164329913  0.163684091  0.163558398  0.160282530  0.159025995
## [311]  0.159021583  0.155931517  0.155086122  0.153093232  0.151937894
## [316]  0.151857716  0.151691516  0.151254473  0.148056107  0.147742454
## [321]  0.147598485  0.146413571  0.145658006  0.143509180  0.142767254
## [326]  0.140891653  0.140488776  0.138385516  0.138054599  0.137212836
## [331]  0.137136333  0.136483152  0.131889688  0.126459930  0.126315187
## [336]  0.125997595  0.124911240  0.122942039  0.122385389  0.121456478
## [341]  0.120888028  0.119214951  0.118896849  0.118353549  0.116936758
## [346]  0.116352925  0.114878429  0.114870581  0.113298971  0.111783899
## [351]  0.110823593  0.110083710  0.109592618  0.109169290  0.108884328
## [356]  0.108463099  0.107433411  0.107157816  0.107042277  0.105247477
## [361]  0.104436716  0.103928756  0.100059566  0.098479913  0.097325066
## [366]  0.094428593  0.091233986  0.087251463  0.086585507  0.083593108
## [371]  0.083101521  0.082854107  0.081669120  0.081412905  0.081380553
## [376]  0.079773645  0.077769330  0.077187840  0.076474499  0.070321590
## [381]  0.069054405  0.065625084  0.065539606  0.065403119  0.064968257
## [386]  0.059795807  0.059341733  0.056874860  0.056111969  0.055719326
## [391]  0.055411024  0.054283134  0.054263065  0.049330734  0.046535241
## [396]  0.045090791  0.044845784  0.044640074  0.036623192  0.033688004
## [401]  0.031645962  0.031067564  0.029514896  0.028954345  0.028120570
## [406]  0.027572887  0.025070533  0.022120724  0.020756312  0.015821488
## [411]  0.013446937  0.012468640  0.011813756  0.010964549  0.005553016
## [416]  0.004774123  0.003141584 -0.003982315 -0.004752782 -0.006512224
## [421] -0.006910454 -0.007534220 -0.010632302 -0.011939872 -0.012369411
## [426] -0.012440809 -0.012497556 -0.018626493 -0.034189099 -0.034266728
## [431] -0.034827224 -0.036745758 -0.038611040 -0.039123591 -0.041308484
## [436] -0.043412188 -0.048015597 -0.050186218 -0.051180312 -0.056920154
## [441] -0.057084644 -0.057245774 -0.059433005 -0.060058924 -0.063671503
## [446] -0.064778594 -0.066480355 -0.072314502 -0.073646768 -0.074490570
## [451] -0.074551019 -0.074935363 -0.078639997 -0.078987439 -0.080637636
## [456] -0.083505510 -0.084777361 -0.097351924 -0.097840272 -0.104092846
## [461] -0.104920752 -0.111054264 -0.111388281 -0.112119026 -0.119474009
## [466] -0.138684143 -0.139462830 -0.143611248 -0.149387518 -0.165823468
## [471] -0.169815610 -0.170311719 -0.171695962 -0.189538684 -0.196173560
## [476] -0.201444724 -0.207169429 -0.207732707 -0.222523929 -0.226585571
## [481] -0.227571057 -0.243313449 -0.256198081 -0.258556973 -0.270734427
## [486] -0.294921384 -0.312724842 -0.327774703 -0.392414221 -0.447158136
## [491] -0.527342399

rownames(annotation_col)<- rownames(y.training)

annotation_col.test = data.frame(
  mPS=as.numeric(y.test$mPS.lamda.min),
  mPS_Q=factor(y.test$mPS_Q),
  mPS_QHL=factor(y.test$mPS_QHL)
)
rownames(annotation_col.test)<- rownames(y.test)

# Specify colors
ann_colors = list(
  mPS_QHL= c("Low"="black", "High"="red"),
  mPS_Q= c("Q1"="black","Q2"="yellow", "Q3"="orange", "Q4"= "red")
)

dim(y.training)

## [1] 491  23

pheatmap(t(y.training[, 1:x]),
         color=colfunc(ncol(t(y.training[, 1:x]))),
         scale= "row", clustering_method = "ward.D2",
         cluster_cols = F,cluster_rows = F,
         annotation_col = annotation_col, annotation_colors = ann_colors,
         show_colnames = F,
         main= "TrainingData")

y.scale<- scale(y.training[,1:x], center = T, scale = T)
y.scale[1:10, 1:5]

##                                                  RHOT1    RPS6KA2      ASAH1
## GSM657645_SO404                              1.7212318  3.4002202  2.5028185
## GSM657639_SO395                              3.7762678  1.3545430  1.6207530
## GSM657699_SO77                               2.3430109  2.6732905  1.3968130
## TCGA-61-1721-01A-01R-0653-01                 1.1380233  2.3803221  1.3166308
## TCGA-13-1485-01A-02R-0477-01                 0.8882425  1.9549501  1.2587135
## GSM657575_SO21                               1.5026774  2.2211549  0.4168726
## GSM461399.CEL.gz                             0.4572770  0.7980393  2.4185301
## GSM643035_OC01_ARN0161_s1h1s1_U133p2.CEL.gz -0.4789603  0.5555957  0.1802790
## GSM461389.CEL.gz                             2.7899957 -0.4192973  0.4007161
## TCGA-24-1474-01A-01R-0502-01                 2.8546261  0.1062374 -0.1119901
##                                                  RASA1     EDNRA
## GSM657645_SO404                              2.0492443 1.5947094
## GSM657639_SO395                              2.2067718 3.1378704
## GSM657699_SO77                               2.0407125 4.5438695
## TCGA-61-1721-01A-01R-0653-01                 0.8613005 3.1263114
## TCGA-13-1485-01A-02R-0477-01                 1.2947314 1.5910556
## GSM657575_SO21                               1.8990795 1.2725529
## GSM461399.CEL.gz                             0.9325420 2.8066082
## GSM643035_OC01_ARN0161_s1h1s1_U133p2.CEL.gz -0.1843939 2.1022245
## GSM461389.CEL.gz                            -1.9627141 2.2035564
## TCGA-24-1474-01A-01R-0502-01                 1.6215367 0.6559337

y.scale_1<- ifelse(y.scale>=2,2, 
                 ifelse(y.scale<= -2, -2, y.scale))

pheatmap(t(y.scale_1),
         color=colfunc(ncol(t(y.scale_1))),
         scale= "none", clustering_method = "ward.D2",
         cluster_cols = F,cluster_rows = F,
         annotation_col = annotation_col, annotation_colors = ann_colors,
         show_colnames = F,
         main="TrainingData"
         )

y.scale.test<- scale(y.test[,1:x.test], center = T, scale = T)

y.scale_1.test<- ifelse(y.scale.test>=2,2, 
                   ifelse(y.scale.test<= -2, -2, y.scale.test))

pheatmap(t(y.scale_1.test),
         color=colfunc(ncol(t(y.scale_1.test))),
         scale= "none", clustering_method = "ward.D2",
         cluster_cols = F,cluster_rows = F,
         annotation_col = annotation_col.test, annotation_colors = ann_colors,
         show_colnames = F,
         main="TestData"
)

####################################
#MDS plot with 20 predictor genes#
###################################
par(mfrow= c(1,2))
set.seed(123)
plotMDS(t(y.training[, 1:x]), col= colfunc(4)[y.training$mPS_Q], pch=19, main= "MDS_TrainingData")
legend("topright", legend= levels(y.training$mPS_Q), col=colfunc(4), pch=19 )

set.seed(123)
#testdata
plotMDS(t(y.test[, 1:x.test]), col= colfunc(4)[y.test$mPS_Q], pch=19, main= "MDS_TestData")
legend("topright", legend= levels(y.test$mPS_Q), col=colfunc(4), pch=19 )

#############################################
#Clinical data association with mPS score#
############################################

library(TCGAbiolinks)

## Warning: package 'TCGAbiolinks' was built under R version 4.1.1

query <- GDCquery(project = "TCGA-OV", 
                  data.category = "Clinical",
                  data.type = "Clinical Supplement", 
                  data.format = "BCR Biotab")

## --------------------------------------

## o GDCquery: Searching in GDC database

## --------------------------------------

## Genome of reference: hg38

## --------------------------------------------

## oo Accessing GDC. This might take a while...

## --------------------------------------------

## ooo Project: TCGA-OV

## --------------------

## oo Filtering results

## --------------------

## ooo By data.format

## ooo By data.type

## ----------------

## oo Checking data

## ----------------

## ooo Check if there are duplicated cases

## Warning: There are more than one file for the same case. Please verify query results. You can use the command View(getResults(query)) in rstudio

## ooo Check if there results for the query

## -------------------

## o Preparing output

## -------------------

GDCdownload(query)

## Downloading data for project TCGA-OV

## Of the 7 files for download 7 already exist.

## All samples have been already downloaded

clinical.BCRtab.all <- GDCprepare(query)

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |======================================================================| 100%

tibble::tibble(sort(colnames(clinical.BCRtab.all$clinical_patient_ov)))

## # A tibble: 57 × 1
##    `sort(colnames(clinical.BCRtab.all$clinical_patient_ov))`
##    <chr>                                                    
##  1 age_at_initial_pathologic_diagnosis                      
##  2 anatomic_neoplasm_subdivision                            
##  3 bcr_patient_barcode                                      
##  4 bcr_patient_uuid                                         
##  5 birth_days_to                                            
##  6 clinical_M                                               
##  7 clinical_N                                               
##  8 clinical_stage                                           
##  9 clinical_T                                               
## 10 days_to_initial_pathologic_diagnosis                     
## # … with 47 more rows

yy<- clinical.BCRtab.all$clinical_patient_ov


m<- match(substr(rownames(OV.Surv_Score), 1,12),  yy$bcr_patient_barcode)
OV.Surv_Score$age_at_diagnosis <- as.numeric(yy$age_at_initial_pathologic_diagnosis[m])
OV.Surv_Score$age_at_diagnosis_HvsL<-factor(ifelse(OV.Surv_Score$age_at_diagnosis>=median(OV.Surv_Score$age_at_diagnosis, na.rm = T), "High", "Low"))

OV.Surv_Score$tumor_grade<-  factor(yy$tumor_grade[m])
OV.Surv_Score$tumor_grade<-  factor(yy$tumor_grade[m], levels= c("G1", "G2", "G3", "G4", "GB", "GX"))
OV.Surv_Score$tumor_grade_numeric<- as.numeric(OV.Surv_Score$tumor_grade)
OV.Surv_Score$tumor_grade_numeric<- as.numeric(factor(OV.Surv_Score$tumor_grade_numeric,levels= 1:4))
OV.Surv_Score$residual_disease_largest_nodule<-  factor(yy$residual_disease_largest_nodule[m], levels=
                                                          c("No Macroscopic disease", "1-10 mm", "11-20 mm", ">20 mm"))
OV.Surv_Score$residual_disease_largest_nodule_numeric<- as.numeric(OV.Surv_Score$residual_disease_largest_nodule)

OV.Surv_Score$ecog_score<- factor(yy$ecog_score[m])
OV.Surv_Score$ecog_score<- factor(OV.Surv_Score$ecog_score, levels= c(0:3))
OV.Surv_Score$karnofsy_score<- factor(yy$karnofsky_score[m])
OV.Surv_Score$karnofsy_score<- factor(OV.Surv_Score$karnofsy_score, levels= c(40,60,80,100))
OV.Surv_Score$clinical_stage<-  factor(yy$clinical_stage[m])
levels(OV.Surv_Score$clinical_stage)<- gsub("Stage ", "",levels(OV.Surv_Score$clinical_stage))
OV.Surv_Score$clinical_stage<- factor(OV.Surv_Score$clinical_stage, 
                                      levels= c("IA", "IB", "IC", "IIA", "IIB", "IIC", "IIIA", "IIIB", "IIIC","IV") )

OV.Surv_Score$clinical_stage_numeric<- as.numeric(OV.Surv_Score$clinical_stage)

OV.Surv_Score$lymphovascular_invasion_indicator<- factor(yy$lymphovascular_invasion_indicator[m], levels= c("NO", "YES"))
OV.Surv_Score$ethnicity<- factor(yy$ethnicity[m])
levels(OV.Surv_Score$ethnicity)<- c(NA, "HispanicORLatino", "NotHispanicORLatino")
colnames(colData(TCGA.OV))

##  [1] "barcode"                     "patient"                    
##  [3] "sample"                      "shortLetterCode"            
##  [5] "definition"                  "sample_submitter_id"        
##  [7] "sample_type_id"              "sample_id"                  
##  [9] "sample_type"                 "state"                      
## [11] "intermediate_dimension"      "pathology_report_uuid"      
## [13] "submitter_id"                "shortest_dimension"         
## [15] "longest_dimension"           "is_ffpe"                    
## [17] "tissue_type"                 "synchronous_malignancy"     
## [19] "tumor_stage"                 "days_to_diagnosis"          
## [21] "treatments"                  "last_known_disease_status"  
## [23] "tissue_or_organ_of_origin"   "days_to_last_follow_up"     
## [25] "primary_diagnosis"           "age_at_diagnosis"           
## [27] "prior_malignancy"            "year_of_diagnosis"          
## [29] "prior_treatment"             "morphology"                 
## [31] "classification_of_tumor"     "diagnosis_id"               
## [33] "icd_10_code"                 "site_of_resection_or_biopsy"
## [35] "tumor_grade"                 "progression_or_recurrence"  
## [37] "figo_stage"                  "alcohol_history"            
## [39] "exposure_id"                 "gender"                     
## [41] "ethnicity"                   "vital_status"               
## [43] "race"                        "age_at_index"               
## [45] "days_to_birth"               "year_of_birth"              
## [47] "demographic_id"              "year_of_death"              
## [49] "days_to_death"               "bcr_patient_barcode"        
## [51] "primary_site"                "disease_type"               
## [53] "project_id"                  "releasable"                 
## [55] "name"                        "released"                   
## [57] "sample.aux"                  "time"                       
## [59] "description"

m<- match(rownames(OV.Surv_Score),TCGA.OV$barcode)
OV.Surv_Score$Figo_stage<- factor(TCGA.OV$figo_stage[m])
OV.Surv_Score$age_at_diagnosis_in_days<- TCGA.OV$age_at_diagnosis[m]
OV.Surv_Score$age_at_diagnosis_in_yrs<- OV.Surv_Score$age_at_diagnosis_in_days/365

##########################################################
#Cox regression hazrd model on clinical parameters and mPS#
###########################################################

OV.Surv_Score$age_at_diagnosis_HvsL <- factor(OV.Surv_Score$age_at_diagnosis_HvsL,levels= c("Low", "High"))
covariates <- c("age_at_diagnosis_HvsL", "clinical_stage_numeric",  "tumor_grade_numeric", "ethnicity","residual_disease_largest_nodule_numeric", "lamda.min")

res.cox <- coxph(Surv(OV.Surv_Score$time, OV.Surv_Score$vital_status=="Dead") ~  clinical_stage_numeric, data =OV.Surv_Score)
summary(res.cox)

## Call:
## coxph(formula = Surv(OV.Surv_Score$time, OV.Surv_Score$vital_status == 
##     "Dead") ~ clinical_stage_numeric, data = OV.Surv_Score)
## 
##   n= 529, number of events= 333 
##    (453 observations deleted due to missingness)
## 
##                           coef exp(coef) se(coef)     z Pr(>|z|)   
## clinical_stage_numeric 0.16559   1.18008  0.05776 2.867  0.00415 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                        exp(coef) exp(-coef) lower .95 upper .95
## clinical_stage_numeric      1.18     0.8474     1.054     1.322
## 
## Concordance= 0.557  (se = 0.014 )
## Likelihood ratio test= 10.1  on 1 df,   p=0.001
## Wald test            = 8.22  on 1 df,   p=0.004
## Score (logrank) test = 8.33  on 1 df,   p=0.004

univ_formulas <- sapply(covariates,
                        function(x) as.formula(paste('Surv(OV.Surv_Score$time, OV.Surv_Score$vital_status=="Dead")~', x)))

univ_models <- lapply( univ_formulas, function(x){coxph(x, data = OV.Surv_Score)})
# Extract data 
univ_results <- lapply(univ_models,
                       function(x){ 
                         x <- summary(x)
                         p.value<-signif(x$waldtest["pvalue"], digits=2)
                         wald.test<-signif(x$waldtest["test"], digits=2)
                         beta<-signif(x$coefficients[1], digits=2);#coeficient beta
                         HR <-signif(x$coefficients[2], digits=2);#exp(beta)
                         HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
                         HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
                         HR <- paste0(HR, " (", 
                                      HR.confint.lower, "-", HR.confint.upper, ")")
                         res<-c(beta, HR, wald.test, p.value)
                         names(res)<-c("beta", "HR (95% CI for HR)", "wald.test", 
                                       "p.value")
                         return(res)
                         #return(exp(cbind(coef(x),confint(x))))
                       })

res <- t(as.data.frame(univ_results, check.names = FALSE))
as.data.frame(res) # age_at_diagnosis, clinical_stage, residual_disease, lamda.min are significant

##                                         beta HR (95% CI for HR) wald.test
## age_at_diagnosis_HvsL                   0.32      1.4 (1.1-1.7)       8.5
## clinical_stage_numeric                  0.17      1.2 (1.1-1.3)       8.2
## tumor_grade_numeric                     0.21     1.2 (0.93-1.6)       2.1
## ethnicity                               -0.1        0.9 (0.4-2)      0.06
## residual_disease_largest_nodule_numeric 0.28      1.3 (1.2-1.5)        25
## lamda.min                                1.9        7 (5.1-9.7)       140
##                                         p.value
## age_at_diagnosis_HvsL                    0.0035
## clinical_stage_numeric                   0.0041
## tumor_grade_numeric                        0.15
## ethnicity                                  0.81
## residual_disease_largest_nodule_numeric 4.5e-07
## lamda.min                               3.5e-32

#ethnicity, tumor_grade doesn't play role.
#write.csv(as.data.frame(res), file= "Univariates_clinicalparameters.csv")

#Multivariate
res.cox <- coxph(Surv(OV.Surv_Score$time, OV.Surv_Score$vital_status=="Dead") ~ age_at_diagnosis_HvsL+clinical_stage_numeric+residual_disease_largest_nodule_numeric+lamda.min , data =OV.Surv_Score)
summary(res.cox)

## Call:
## coxph(formula = Surv(OV.Surv_Score$time, OV.Surv_Score$vital_status == 
##     "Dead") ~ age_at_diagnosis_HvsL + clinical_stage_numeric + 
##     residual_disease_largest_nodule_numeric + lamda.min, data = OV.Surv_Score)
## 
##   n= 473, number of events= 303 
##    (509 observations deleted due to missingness)
## 
##                                            coef exp(coef) se(coef)     z
## age_at_diagnosis_HvsLHigh               0.20694   1.22991  0.11829 1.749
## clinical_stage_numeric                  0.09805   1.10302  0.06332 1.549
## residual_disease_largest_nodule_numeric 0.23446   1.26423  0.05875 3.991
## lamda.min                               1.81531   6.14300  0.26514 6.847
##                                         Pr(>|z|)    
## age_at_diagnosis_HvsLHigh                 0.0802 .  
## clinical_stage_numeric                    0.1215    
## residual_disease_largest_nodule_numeric 6.58e-05 ***
## lamda.min                               7.56e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                         exp(coef) exp(-coef) lower .95
## age_at_diagnosis_HvsLHigh                   1.230     0.8131    0.9754
## clinical_stage_numeric                      1.103     0.9066    0.9743
## residual_disease_largest_nodule_numeric     1.264     0.7910    1.1267
## lamda.min                                   6.143     0.1628    3.6534
##                                         upper .95
## age_at_diagnosis_HvsLHigh                   1.551
## clinical_stage_numeric                      1.249
## residual_disease_largest_nodule_numeric     1.419
## lamda.min                                  10.329
## 
## Concordance= 0.652  (se = 0.018 )
## Likelihood ratio test= 81.59  on 4 df,   p=<2e-16
## Wald test            = 77.63  on 4 df,   p=6e-16
## Score (logrank) test = 79  on 4 df,   p=3e-16

ggforest(res.cox, data= OV.Surv_Score) # showing summary of results

combined_counts<-as.matrix(OV.Surv_Score[,c("lamda.min","age_at_diagnosis", "residual_disease_largest_nodule_numeric")])
combined_counts<- combined_counts[complete.cases(combined_counts),]
head(combined_counts)[, 1:3]

##                               lamda.min age_at_diagnosis
## TCGA-29-1692-01B-01R-0808-01 0.07776236               58
## TCGA-13-1488-01A-01R-0477-01 0.09313776               59
## TCGA-13-1483-01A-01R-0477-01 0.25075725               61
## TCGA-24-2024-01A-02R-0668-01 0.10741438               72
## TCGA-13-0908-01B-01R-0453-01 0.23766284               58
## TCGA-36-1575-01A-01R-0538-01 0.50501827               83
##                              residual_disease_largest_nodule_numeric
## TCGA-29-1692-01B-01R-0808-01                                       3
## TCGA-13-1488-01A-01R-0477-01                                       3
## TCGA-13-1483-01A-01R-0477-01                                       4
## TCGA-24-2024-01A-02R-0668-01                                       2
## TCGA-13-0908-01B-01R-0453-01                                       2
## TCGA-36-1575-01A-01R-0538-01                                       1

combined_surv<- OV.Surv_Score[rownames(combined_counts),]
cv_glmnet_cox_Combined<-cv.glmnet(x= combined_counts, y= Surv(combined_surv$time, combined_surv$vital_status=="Dead"), family= "cox", nfolds =100 )
plot(cv_glmnet_cox_Combined)
plot(cv_glmnet_cox_Combined$glmnet.fit, s= cv_glmnet_cox_Combined$lambda.min)

mPS.Score_Combined<-  predict(cv_glmnet_cox_Combined,s=c(0,cv_glmnet_cox_Combined$lambda.min, cv_glmnet_cox_Combined$lambda.1se),
                     newx = combined_counts, type= "link")
colnames(mPS.Score_Combined)<- c("lamda.all", "lamda.min", "lamda.1se")
mPS.Score_Combined<- mPS.Score_Combined[, 2:3]
colnames(mPS.Score_Combined)<- c("mPS+age+RD", "mPS+age+RD.1se")

coefvalues<-coefficients(cv_glmnet_cox_Combined, s= cv_glmnet_cox_Combined$lambda.min)
coefvalues<- as.matrix(coefvalues)
coefvalues<-as.data.frame(coefvalues[coefvalues[,1]!=0,]) 
colnames(coefvalues)<- "mPS+age+RD"
#write.csv(coefvalues, file= "LASSO_coef.csv")
m<-match(rownames(combined_surv),rownames(mPS.Score_Combined)) #perfect match
m

##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## [217] 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## [235] 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
## [253] 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## [271] 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
## [289] 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
## [307] 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
## [325] 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
## [343] 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
## [361] 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
## [379] 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
## [397] 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
## [415] 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
## [433] 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
## [451] 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
## [469] 469 470 471 472 473 474 475

combined_surv<- cbind.data.frame(combined_surv,mPS.Score_Combined)
#lamda.min_TCGA
OV_pred<-survivalROC(Stime=combined_surv$time,
                     status= as.numeric(combined_surv$vital_status=="Dead"),
                     marker=combined_surv$lamda.min,
                     predict.time = 5*365, method= "KM"
)
OV_pred_combined<- list(
  "mPS"=OV_pred
)
# adding age
OV_pred<-survivalROC(Stime=combined_surv$time,
                     status= as.numeric(combined_surv$vital_status=="Dead"),
                     marker=combined_surv$age_at_diagnosis,
                     predict.time = 5*365, method= "KM"
)
OV_pred_combined<- list(
  "mPS"=OV_pred_combined$mPS,
  age= OV_pred
)

# Adding residual disesase(RD)

OV_pred<-survivalROC(Stime=combined_surv$time,
                     status= as.numeric(combined_surv$vital_status=="Dead"),
                     marker=combined_surv$residual_disease_largest_nodule_numeric,
                     predict.time = 5*365, method= "KM"
)

OV_pred_combined<- list(
  mPS=OV_pred_combined$mPS,
  age=OV_pred_combined$age,
  Residual= OV_pred
)

# Adding all parameters combined
OV_pred<-survivalROC(Stime=combined_surv$time,
                     status= as.numeric(combined_surv$vital_status=="Dead"),
                     marker=combined_surv$`mPS+age+RD`,
                     predict.time = 5*365, method= "KM"
)

OV_pred_combined<- list(
  mPS=OV_pred_combined$mPS,
  age=OV_pred_combined$age,
  RD=OV_pred_combined$Residual,
  "mPS+age+RD"= OV_pred
)

par(mfrow=c(1,1))
plot(OV_pred_combined[[1]]$FP, OV_pred_combined[[1]]$TP, type="l", col= "black" ,xlim=c(0,1), ylim=c(0,1),  
     
     xlab=paste("FP"), 
     ylab="TP",main="AUC, Method = KM \n Year =5, Sample= TCGA")
abline(0,1)

my.colors<- c("black", "blue", "green", "red")

for (i in 2:4) {lines(OV_pred_combined[[i]]$FP, OV_pred_combined[[i]]$TP, type="l", col= my.colors[i] ,xlim=c(0,1), ylim=c(0,1))
}

txt<- c(round(OV_pred_combined[[1]]$AUC,2),round(OV_pred_combined[[2]]$AUC,2),round(OV_pred_combined[[3]]$AUC,2),round(OV_pred_combined[[4]]$AUC,2)
        ) 

legend("bottomright", legend= c(names(OV_pred_combined),txt), col= my.colors, lty=1, ncol=2, cex=0.75, title=paste0("     ", "AUC"),
       y.intersp=0.75,x.intersp =0)

#SDE genes between high vs low prognostic score
OV_TCGA_GSE.Counts_All<- y.tcga.gse_cor_batch
head(OV_TCGA_GSE.Counts_All)[, 1:5]

##        TCGA-29-1692-01B-01R-0808-01 TCGA-13-1488-01A-01R-0477-01
## EEF1A1                     13.03324                     13.25729
## RPS3A                      12.92130                     12.53616
## RPS4X                      12.27286                     12.24965
## RPS27                      12.19733                     12.09124
## RPL39                      12.75188                     12.50652
## RPS23                      12.55604                     12.43668
##        TCGA-13-1483-01A-01R-0477-01 TCGA-24-2024-01A-02R-0668-01
## EEF1A1                     12.98532                     13.15260
## RPS3A                      12.81662                     12.92130
## RPS4X                      12.87456                     12.65051
## RPS27                      12.22907                     12.47443
## RPL39                      12.75188                     12.53887
## RPS23                      12.34817                     12.34817
##        TCGA-13-0908-01B-01R-0453-01
## EEF1A1                     12.94473
## RPS3A                      12.69725
## RPS4X                      12.42533
## RPS27                      12.29580
## RPL39                      12.75188
## RPS23                      12.05903

dim(OV_TCGA_GSE.Counts_All)

## [1] 10225  1016

OV_TCGA_GSE.Counts_All<- OV_TCGA_GSE.Counts_All[,rownames(OV.Surv_Score)]
design<- model.matrix(~OV.Surv_Score$batch+OV.Surv_Score$lamda.min_QHL, data=OV.Surv_Score)
dim(design)

## [1] 982   7

colnames(design)

## [1] "(Intercept)"                     "OV.Surv_Score$batchGSE14764"    
## [3] "OV.Surv_Score$batchGSE18520"     "OV.Surv_Score$batchGSE26193"    
## [5] "OV.Surv_Score$batchGSE26712"     "OV.Surv_Score$batchGSE63855"    
## [7] "OV.Surv_Score$lamda.min_QHLHigh"

OV_TCGA_GSE.Counts_All<- OV_TCGA_GSE.Counts_All[, rownames(design)]
fit<- lmFit(OV_TCGA_GSE.Counts_All, design)
fit.ebayes<- eBayes(fit)
summary(decideTests(fit.ebayes))

##        (Intercept) OV.Surv_Score$batchGSE14764 OV.Surv_Score$batchGSE18520
## Down             0                           0                         290
## NotSig           0                       10225                        9882
## Up           10225                           0                          53
##        OV.Surv_Score$batchGSE26193 OV.Surv_Score$batchGSE26712
## Down                             0                          11
## NotSig                       10225                       10192
## Up                               0                          22
##        OV.Surv_Score$batchGSE63855 OV.Surv_Score$lamda.min_QHLHigh
## Down                             0                            2453
## NotSig                       10225                            5784
## Up                               0                            1988

colnames(fit.ebayes)

## [1] "(Intercept)"                     "OV.Surv_Score$batchGSE14764"    
## [3] "OV.Surv_Score$batchGSE18520"     "OV.Surv_Score$batchGSE26193"    
## [5] "OV.Surv_Score$batchGSE26712"     "OV.Surv_Score$batchGSE63855"    
## [7] "OV.Surv_Score$lamda.min_QHLHigh"

topTable(fit.ebayes, coef=7)

##              logFC  AveExpr         t      P.Value    adj.P.Val        B
## DCHS1    0.5050088 5.661414  14.40849 7.957704e-43 8.136752e-39 86.28429
## PDGFRB   0.4682045 6.071064  12.60355 7.369996e-34 3.767910e-30 65.97661
## FSTL1    0.6923946 8.919050  12.24437 3.564924e-32 1.215045e-28 62.16388
## RPS6KA2  0.4686437 5.569783  12.03339 3.350834e-31 8.565570e-28 59.96194
## BTN3A3  -0.5787439 5.790756 -11.92691 1.027109e-30 1.998127e-27 58.86132
## PCOLCE   0.7639884 7.262415  11.91428 1.172495e-30 1.998127e-27 58.73124
## TCF4     0.6098881 7.725724  11.83212 2.766664e-30 4.041306e-27 57.88776
## VAT1     0.3891708 8.367604  11.77586 4.968365e-30 6.350191e-27 57.31260
## GJB1    -0.4791447 6.000926 -11.75952 5.886913e-30 6.669130e-27 57.14594
## ZEB1     0.5629934 5.032878  11.74964 6.522376e-30 6.669130e-27 57.04524

m<- match(rownames(fit.ebayes), egSymbol$symbol)
fit.ebayes[["geneinfo"]]<- list(
  gene_symbol=rownames(fit.ebayes),
  gene_id=egSymbol$gene_id[m]
)

results<-decideTests(fit.ebayes, coef=7)
summary(results[,7])

##        OV.Surv_Score$lamda.min_QHLHigh
## Down                              2453
## NotSig                            5784
## Up                                1988

n=summary(results)[1,7]+summary(results)[3,7]
results<- data.frame(results[,7])
m<- match(rownames(results),egSymbol$symbol)
results$gene_id<-egSymbol$gene_id[m]

topDEgenes<-topTable(fit.ebayes,coef=7,number=n)
m<- match(rownames(topDEgenes), egSymbol$symbol)
topDEgenes$gene_id<- egSymbol$gene_id[m]
#write.csv(topDEgenes, file= "topDEgenes_HighvsLowPrognosticScore2.csv")

par(mfrow=c(1,1))
plotMD(fit.ebayes, coef=7, status=decideTests(fit.ebayes[,7], lfc= log2(1.2)))
abline(h=log2(1.2), col= "green")
abline(h=-log2(1.2), col= "green")

par(mfrow=c(2,5))

for (i in 1:10) {
  plot(OV_TCGA_GSE.Counts_All[rownames(topDEgenes)[i], ]~OV.Surv_Score$lamda.min_QHL,
       xlab= "", ylab= "log2.expression", main= rownames(topDEgenes)[i])
  
}

go<-goana.MArrayLM(fit.ebayes, geneid = fit.ebayes$geneinfo$gene_id, species= "Hs")
topGO(go, ontology = "BP", sort="down")

##                                                    Term Ont    N  Up Down
## GO:0034660                      ncRNA metabolic process  BP  261  18  131
## GO:0140053                mitochondrial gene expression  BP   92   4   61
## GO:0006281                                   DNA repair  BP  360  30  159
## GO:0034470                             ncRNA processing  BP  224  16  112
## GO:0006974     cellular response to DNA damage stimulus  BP  577  72  222
## GO:0032543                    mitochondrial translation  BP   77   3   51
## GO:0006259                        DNA metabolic process  BP  605  78  228
## GO:0034641 cellular nitrogen compound metabolic process  BP 3938 724 1106
## GO:0045333                         cellular respiration  BP  135   6   73
## GO:0006396                               RNA processing  BP  568  74  213
## GO:0006399                       tRNA metabolic process  BP   80   1   49
## GO:0022613         ribonucleoprotein complex biogenesis  BP  283  22  120
## GO:0070125       mitochondrial translational elongation  BP   55   2   37
## GO:0022904         respiratory electron transport chain  BP   82   3   47
## GO:0042254                          ribosome biogenesis  BP  191  16   86
## GO:0006415                    translational termination  BP   65   2   40
## GO:0044786                   cell cycle DNA replication  BP   51   2   34
## GO:0006414                     translational elongation  BP   78   2   45
## GO:0070126      mitochondrial translational termination  BP   54   2   35
## GO:0006261                DNA-dependent DNA replication  BP  105   6   55
##                 P.Up       P.Down
## GO:0034660 1.0000000 1.442977e-20
## GO:0140053 0.9999956 7.262668e-18
## GO:0006281 1.0000000 9.732623e-18
## GO:0034470 1.0000000 1.366295e-17
## GO:0006974 0.9999981 1.035702e-15
## GO:0032543 0.9999894 3.994494e-15
## GO:0006259 0.9999956 6.135721e-15
## GO:0034641 0.9849578 1.408078e-14
## GO:0045333 0.9999999 3.770885e-14
## GO:0006396 0.9999873 9.390683e-14
## GO:0006399 1.0000000 1.188029e-12
## GO:0022613 1.0000000 3.504547e-12
## GO:0070125 0.9999050 1.202226e-11
## GO:0022904 0.9999960 9.281457e-11
## GO:0042254 0.9999941 1.087645e-10
## GO:0006415 0.9999874 1.148338e-10
## GO:0044786 0.9997887 1.209002e-10
## GO:0006414 0.9999991 1.728442e-10
## GO:0070126 0.9998839 2.108727e-10
## GO:0006261 0.9999874 2.694761e-10

topGO(go, ontology = "BP", sort="up")

##                                                     Term Ont    N   Up Down
## GO:0009653            anatomical structure morphogenesis  BP 1937  594  301
## GO:0072359                circulatory system development  BP  812  300  118
## GO:0006928     movement of cell or subcellular component  BP 1493  460  238
## GO:0001944                       vasculature development  BP  558  221   86
## GO:0001568                      blood vessel development  BP  537  214   82
## GO:0048856              anatomical structure development  BP 3912  981  724
## GO:0048731                            system development  BP 3309  851  588
## GO:0007275            multicellular organism development  BP 3629  917  663
## GO:0035295                              tube development  BP  790  278  124
## GO:0035239                            tube morphogenesis  BP  644  239  103
## GO:0040011                                    locomotion  BP 1310  405  209
## GO:0030198             extracellular matrix organization  BP  308  141   33
## GO:0043062          extracellular structure organization  BP  309  141   33
## GO:0032502                         developmental process  BP 4218 1030  821
## GO:0045229 external encapsulating structure organization  BP  310  141   33
## GO:0048514                    blood vessel morphogenesis  BP  481  190   72
## GO:0016477                                cell migration  BP 1097  349  169
## GO:0050793           regulation of developmental process  BP 1708  487  323
## GO:0048870                                 cell motility  BP 1177  362  191
## GO:0051674                          localization of cell  BP 1177  362  191
##                    P.Up    P.Down
## GO:0009653 2.423355e-40 1.0000000
## GO:0072359 3.072481e-34 1.0000000
## GO:0006928 1.798259e-30 1.0000000
## GO:0001944 5.858553e-30 0.9999999
## GO:0001568 1.907934e-29 0.9999999
## GO:0048856 2.176890e-29 1.0000000
## GO:0048731 7.195580e-28 1.0000000
## GO:0007275 7.716825e-28 1.0000000
## GO:0035295 1.402058e-27 1.0000000
## GO:0035239 1.942823e-27 0.9999999
## GO:0040011 8.347261e-27 1.0000000
## GO:0030198 1.419141e-26 1.0000000
## GO:0043062 2.128803e-26 1.0000000
## GO:0032502 2.214493e-26 1.0000000
## GO:0045229 3.184664e-26 1.0000000
## GO:0048514 1.335427e-25 0.9999998
## GO:0016477 2.470658e-25 1.0000000
## GO:0050793 1.183719e-23 1.0000000
## GO:0048870 2.657739e-23 1.0000000
## GO:0051674 2.657739e-23 1.0000000

#write.csv(topGO(go, ontology = "BP", number = 50L), file= "topGO_HighvsLowRiskGrop.csv")
#write.csv(topGO(go, ontology = "BP", sort="up", number = 20L), file= "topGO_HighvsLowRiskGrop_sortUP.csv")

kegg<- kegga.MArrayLM(fit.ebayes, geneid = fit.ebayes$geneinfo$gene_id, species= "Hs")
topKEGG(kegg) #TGF-B, PI3K-Akt,SignalingPathys_PluropotencyStemCells,FocalAdhesion-Allup

##                                                         Pathway    N  Up Down
## path:hsa04510                                    Focal adhesion  176  79   16
## path:hsa04612               Antigen processing and presentation   58   2   37
## path:hsa01100                                Metabolic pathways 1076 139  340
## path:hsa04360                                     Axon guidance  149  60   17
## path:hsa05206                               MicroRNAs in cancer  145  58   20
## path:hsa03030                                   DNA replication   32   0   23
## path:hsa00190                         Oxidative phosphorylation   88   6   45
## path:hsa05200                                Pathways in cancer  455 134   87
## path:hsa05330                               Allograft rejection   32   1   22
## path:hsa01200                                 Carbon metabolism   96   4   46
## path:hsa05205                           Proteoglycans in cancer  181  64   31
## path:hsa04932                 Non-alcoholic fatty liver disease  135  22   59
## path:hsa04512                          ECM-receptor interaction   76  34    4
## path:hsa05208 Chemical carcinogenesis - reactive oxygen species  165  29   68
## path:hsa05332                         Graft-versus-host disease   32   2   21
## path:hsa05415                           Diabetic cardiomyopathy  154  29   64
## path:hsa04520                                 Adherens junction   63  29    7
## path:hsa04010                            MAPK signaling pathway  259  82   35
## path:hsa05414                            Dilated cardiomyopathy   83  35    5
## path:hsa04151                        PI3K-Akt signaling pathway  296  91   40
##                       P.Up       P.Down
## path:hsa04510 8.679302e-15 9.999999e-01
## path:hsa04612 9.999480e-01 1.272035e-10
## path:hsa01100 1.000000e+00 1.124493e-09
## path:hsa04360 2.917801e-09 9.999702e-01
## path:hsa05206 7.172715e-09 9.992293e-01
## path:hsa03030 1.000000e+00 1.392580e-08
## path:hsa00190 9.997822e-01 2.956985e-08
## path:hsa05200 1.011437e-07 9.953293e-01
## path:hsa05330 9.990221e-01 1.044251e-07
## path:hsa01200 9.999979e-01 2.570191e-07
## path:hsa05205 3.022980e-07 9.904237e-01
## path:hsa04932 8.512767e-01 3.263393e-07
## path:hsa04512 4.239156e-07 9.999979e-01
## path:hsa05208 7.580725e-01 6.254447e-07
## path:hsa05332 9.914412e-01 6.821026e-07
## path:hsa05415 6.090733e-01 9.369385e-07
## path:hsa04520 1.445486e-06 9.970510e-01
## path:hsa04010 1.482491e-06 9.999923e-01
## path:hsa05414 1.599650e-06 9.999974e-01
## path:hsa04151 1.651256e-06 9.999981e-01

#CellCycle, DNAreplication,BaseExcisionrepair-Alldown
#write.csv(topKEGG(kegg, number = 50L), file= "topKEGG_HighvsLowRiskGrop.csv")

###############################################
#MANUALLY  assigning KEGGPATWAYS#
#BARCODE PLOT#
###############################################
keggpathways<- getKEGGPathwayNames("hsa")
x<- getGeneKEGGLinks("hsa", convert = T)
par(mfrow= c(2,3))
#1. TGF-B
x.1<- x[x$PathwayID=="path:hsa04350",]
idfound<-topDEgenes$gene_id %in% x.1$GeneID
table(idfound)

## idfound
## FALSE  TRUE 
##  4390    51

barcodeplot(statistics = topDEgenes$logFC, index= idfound,
            xlab= "logFC", main="hsa04350:TGF-beta signaling pathway",
            cex.main=1) # p.up of TGF-b
#2.PI3K-Akt signaling
x.1<- x[x$PathwayID=="path:hsa04151",]
idfound<- topDEgenes$gene_id %in% x.1$GeneID
table(idfound)

## idfound
## FALSE  TRUE 
##  4310   131

barcodeplot(statistics = topDEgenes$logFC, index= idfound,
            xlab= "logFC", main="hsa04151:PI3K-AktSignalingPathway",
            cex.main=1) #p.up more significant


#3.path:hsa04550 Signaling pathways regulating pluripotency of stem cells
x.1<- x[x$PathwayID=="path:hsa04550",]
idfound<- topDEgenes$gene_id%in% x.1$GeneID
table(idfound)

## idfound
## FALSE  TRUE 
##  4370    71

barcodeplot(statistics = topDEgenes$logFC, index= idfound,
            xlab= "logFC", main="hsa04550:StemCellGenes",
            cex.main=1) # p.up
#4.path:hsa04110 Cell cycle
x.1<- x[x$PathwayID=="path:hsa04110",]
idfound<- topDEgenes$gene_id%in% x.1$GeneID
table(idfound)

## idfound
## FALSE  TRUE 
##  4384    57

barcodeplot(statistics = topDEgenes$logFC, index= idfound,
            xlab= "logFC", main="hsa04110:Cell cycle",
            cex.main=1) # p.down

#5.path:hsa03030_DNA replication
x.1<- x[x$PathwayID=="path:hsa03030",]
idfound<- topDEgenes$gene_id%in% x.1$GeneID
table(idfound)

## idfound
## FALSE  TRUE 
##  4418    23

barcodeplot(statistics = topDEgenes$logFC, index= idfound,
            xlab= "logFC", main="hsa03030:DNA replication",
            cex.main=1) # p.down

#6.path:hsa03410_Base excision repair
x.1<- x[x$PathwayID=="path:hsa03410",]
idfound<- topDEgenes$gene_id%in% x.1$GeneID
table(idfound)

## idfound
## FALSE  TRUE 
##  4421    20

barcodeplot(statistics = topDEgenes$logFC, index= idfound,
            xlab= "logFC", main="hsa03410:Base excision repair",
            cex.main=1) # p.down

#7.path:hsa04620_TLR pathways
x.1<- x[x$PathwayID=="path:hsa04620",]
idfound<- topDEgenes$gene_id%in% x.1$GeneID
table(idfound)

## idfound
## FALSE  TRUE 
##  4399    42

barcodeplot(statistics = topDEgenes$logFC, index= idfound,
            xlab= "logFC", main="hsa04620:TLR pathways",
            cex.main=1) # p.down

#8.path:hsa04660_TCR Signaling pathways
x.1<- x[x$PathwayID=="path:hsa04660",]
idfound<- topDEgenes$gene_id%in% x.1$GeneID
table(idfound)

## idfound
## FALSE  TRUE 
##  4394    47

barcodeplot(statistics = topDEgenes$logFC, index= idfound,
            xlab= "logFC", main="hsa04660:TCR Signaling pathways",
            cex.main=1) # no.change

#9.path:hsa04657_IL-17Signaling pathways
x.1<- x[x$PathwayID=="path:hsa04657",]
idfound<- topDEgenes$gene_id%in% x.1$GeneID
table(idfound)

## idfound
## FALSE  TRUE 
##  4405    36

barcodeplot(statistics = topDEgenes$logFC, index= idfound,
            xlab= "logFC", main="hsa04657:IL-17 Signaling pathways",
            cex.main=1)

#10.path:hsa04218_Cellular senesence Signaling pathways
x.1<- x[x$PathwayID=="path:hsa04218",]
idfound<- topDEgenes$gene_id%in% x.1$GeneID
table(idfound)

## idfound
## FALSE  TRUE 
##  4372    69

barcodeplot(statistics = topDEgenes$logFC, index= idfound,
            xlab= "logFC", main="hsa04218:CellularSenesence Signaling pathways",
            cex.main=1)

#11.path:hsa04215_ApoptosisSignaling pathways
x.1<- x[x$PathwayID=="path:hsa04215",]
idfound<- topDEgenes$gene_id%in% x.1$GeneID
table(idfound)

## idfound
## FALSE  TRUE 
##  4430    11

barcodeplot(statistics = topDEgenes$logFC, index= idfound,
            xlab= "logFC", main="hsa04215:Apoptosis Signaling pathways",
            cex.main=1)

#12.path:hsa04140_Autophagy Signaling pathways
x.1<- x[x$PathwayID=="path:hsa04140",]
idfound<- topDEgenes$gene_id%in% x.1$GeneID
table(idfound)

## idfound
## FALSE  TRUE 
##  4390    51

barcodeplot(statistics = topDEgenes$logFC, index= idfound,
            xlab= "logFC", main="hsa04140:Autophagy Signaling pathways",
            cex.main=1)

#arranging columns based on mPS score
dim(OV_TCGA_GSE.Counts_All)

## [1] 10225   982

OV.Surv_Score<-OV.Surv_Score[order(OV.Surv_Score$lamda.min, decreasing=T),]
OV_TCGA_GSE.Counts_All<- OV_TCGA_GSE.Counts_All[, rownames(OV.Surv_Score)]

annotation_col = data.frame(
  mPS=as.numeric(OV.Surv_Score$lamda.min),
  mPS_Q=factor(OV.Surv_Score$lamda.min_Q),
  mPS_QHL=factor(OV.Surv_Score$lamda.min_QHL)
)
annotation_col$mPS

##   [1]  1.1782265834  0.9742625355  0.9621407932  0.9592872375  0.9551706872
##   [6]  0.9530006589  0.8664655442  0.8637098494  0.8609184925  0.8508249698
##  [11]  0.8426213875  0.8405285598  0.8293429513  0.8210808851  0.8075573060
##  [16]  0.8050243967  0.8033739781  0.8001120326  0.8000884257  0.7958043464
##  [21]  0.7890260585  0.7845767088  0.7822693028  0.7801785385  0.7581872692
##  [26]  0.7562607026  0.7486241851  0.7485199273  0.7481539917  0.7445311485
##  [31]  0.7419727410  0.7418344350  0.7404514046  0.7317391610  0.7306239520
##  [36]  0.7304993633  0.7287573922  0.7269025495  0.7242863023  0.7178723458
##  [41]  0.7175170882  0.7171148687  0.7154523345  0.7154318378  0.7101594405
##  [46]  0.7058770818  0.7039343706  0.6933747102  0.6824321402  0.6678119177
##  [51]  0.6649716019  0.6601878578  0.6555502043  0.6474825333  0.6451732009
##  [56]  0.6445435252  0.6422830381  0.6419586328  0.6399693274  0.6362359633
##  [61]  0.6342085762  0.6293001504  0.6285163848  0.6281869398  0.6265131396
##  [66]  0.6201275943  0.6192512668  0.6180698702  0.6129249076  0.6127998098
##  [71]  0.6117828673  0.6105292611  0.6088437753  0.6082959220  0.6081110395
##  [76]  0.6072048551  0.6063685711  0.6050582492  0.6044847508  0.6030332538
##  [81]  0.6022108863  0.5993433460  0.5989853850  0.5975445488  0.5926524309
##  [86]  0.5925475353  0.5918906730  0.5913701551  0.5904267729  0.5890119021
##  [91]  0.5885743798  0.5882741846  0.5881728951  0.5880565575  0.5873865229
##  [96]  0.5850336134  0.5829824647  0.5816431534  0.5787497813  0.5773805841
## [101]  0.5766679791  0.5765878886  0.5747862989  0.5738546207  0.5725458721
## [106]  0.5718965878  0.5718857869  0.5714489756  0.5691909828  0.5687644053
## [111]  0.5656885659  0.5650258158  0.5643565123  0.5593352795  0.5570774937
## [116]  0.5551597430  0.5539041934  0.5520639655  0.5502657835  0.5500487490
## [121]  0.5490529163  0.5484574319  0.5466731068  0.5430740389  0.5430172469
## [126]  0.5429670483  0.5405773680  0.5401692017  0.5381952076  0.5379352558
## [131]  0.5373851478  0.5361231752  0.5355842209  0.5349885335  0.5336282241
## [136]  0.5328966781  0.5319542200  0.5319178929  0.5318616820  0.5317742126
## [141]  0.5314508401  0.5307940372  0.5295698003  0.5293372402  0.5290977627
## [146]  0.5278213711  0.5265099210  0.5222887724  0.5185870715  0.5178240827
## [151]  0.5166211849  0.5144115237  0.5138326542  0.5136979132  0.5119483921
## [156]  0.5103119617  0.5091986500  0.5079900135  0.5050182733  0.5028266991
## [161]  0.5017024148  0.5005259562  0.4997734866  0.4996141700  0.4991811093
## [166]  0.4987935854  0.4987779470  0.4963038119  0.4959437350  0.4859578066
## [171]  0.4850107853  0.4830489299  0.4828302969  0.4826112281  0.4824091317
## [176]  0.4816926526  0.4803047507  0.4794053655  0.4793718578  0.4756748235
## [181]  0.4742116266  0.4725935610  0.4724100626  0.4723048476  0.4718440373
## [186]  0.4715170909  0.4701706358  0.4694613295  0.4676392077  0.4661387553
## [191]  0.4641708160  0.4635599935  0.4634056593  0.4633440330  0.4612194892
## [196]  0.4590667252  0.4581887523  0.4580726673  0.4575887418  0.4575857330
## [201]  0.4572924855  0.4570094219  0.4560481252  0.4548299136  0.4544355969
## [206]  0.4539856666  0.4537488677  0.4529844193  0.4515764828  0.4512234128
## [211]  0.4511787794  0.4484552982  0.4476079640  0.4448422181  0.4445005514
## [216]  0.4429727198  0.4427721652  0.4418358988  0.4400918312  0.4397112323
## [221]  0.4390059254  0.4380512874  0.4373172946  0.4354654145  0.4353000973
## [226]  0.4352011221  0.4343031083  0.4336350561  0.4323548273  0.4316327800
## [231]  0.4313418818  0.4308574779  0.4308174735  0.4303962872  0.4280892049
## [236]  0.4266706569  0.4255468681  0.4235406649  0.4228761623  0.4221363998
## [241]  0.4215576305  0.4175716593  0.4170354214  0.4158458070  0.4155858703
## [246]  0.4153999868  0.4141239121  0.4118329821  0.4112185235  0.4107497399
## [251]  0.4104352029  0.4102810455  0.4096366582  0.4094996078  0.4087215298
## [256]  0.4087057321  0.4081298597  0.4072016433  0.4068512515  0.4068489029
## [261]  0.4067677094  0.4059307882  0.4055276631  0.4054963963  0.4034890596
## [266]  0.4030211808  0.4017713988  0.4012020017  0.4011870178  0.3994539376
## [271]  0.3992454119  0.3989935048  0.3982935399  0.3965768268  0.3955300521
## [276]  0.3953870621  0.3950829371  0.3949347663  0.3934643489  0.3933938929
## [281]  0.3914450409  0.3902689134  0.3886775995  0.3882211471  0.3879460956
## [286]  0.3869450471  0.3862530664  0.3860074031  0.3836490601  0.3833877100
## [291]  0.3832731698  0.3825307235  0.3822386197  0.3820319150  0.3818813415
## [296]  0.3815307093  0.3810986695  0.3807658145  0.3807520660  0.3794541794
## [301]  0.3790954108  0.3782627257  0.3782590847  0.3781014967  0.3779839350
## [306]  0.3773111280  0.3764109491  0.3760185629  0.3752525903  0.3739058880
## [311]  0.3736589315  0.3736169488  0.3735514257  0.3734887715  0.3717767377
## [316]  0.3715847206  0.3703233159  0.3692072431  0.3684812716  0.3679996556
## [321]  0.3678702689  0.3676865171  0.3673917597  0.3666426633  0.3654533052
## [326]  0.3649920706  0.3647268009  0.3638990641  0.3632316025  0.3627744926
## [331]  0.3600263691  0.3599526150  0.3597232356  0.3594246610  0.3592131577
## [336]  0.3577666659  0.3571680908  0.3571137952  0.3563705475  0.3554120480
## [341]  0.3541349938  0.3538142786  0.3531403537  0.3530370694  0.3524030733
## [346]  0.3519880801  0.3519118485  0.3518324038  0.3508916393  0.3506828200
## [351]  0.3506552505  0.3504533290  0.3480151041  0.3471324204  0.3468948498
## [356]  0.3468568199  0.3465333347  0.3459012782  0.3458432333  0.3453976818
## [361]  0.3446613519  0.3442303207  0.3439475344  0.3437635184  0.3429148530
## [366]  0.3424796563  0.3410600329  0.3403843921  0.3396670872  0.3394474883
## [371]  0.3394289424  0.3393898257  0.3377579706  0.3366424026  0.3352400057
## [376]  0.3346416795  0.3345611215  0.3341220642  0.3327175920  0.3324802897
## [381]  0.3309794466  0.3305897174  0.3302147012  0.3295497088  0.3291691584
## [386]  0.3288165622  0.3285667281  0.3267929143  0.3265659436  0.3246935004
## [391]  0.3243623285  0.3235561808  0.3234820316  0.3233615260  0.3233175588
## [396]  0.3203156942  0.3196572834  0.3191573189  0.3182451357  0.3173080572
## [401]  0.3172424153  0.3159617767  0.3152998070  0.3151400913  0.3141587611
## [406]  0.3134587381  0.3124064712  0.3115831421  0.3112408764  0.3109271059
## [411]  0.3106064723  0.3099551224  0.3080008145  0.3079944878  0.3078945376
## [416]  0.3073779401  0.3056495116  0.3037639763  0.3032360067  0.3023203606
## [421]  0.3022060230  0.3021603335  0.3017725607  0.3014043124  0.3011805622
## [426]  0.3004068446  0.3002473509  0.2961302898  0.2959678110  0.2958015362
## [431]  0.2946017509  0.2941873294  0.2939715246  0.2928123101  0.2927168366
## [436]  0.2920608676  0.2911988490  0.2901967376  0.2881088746  0.2873055366
## [441]  0.2868730541  0.2859093643  0.2854452532  0.2851032628  0.2846259915
## [446]  0.2824314527  0.2820866142  0.2811931742  0.2811252443  0.2807221393
## [451]  0.2801989369  0.2801318649  0.2800262722  0.2791501390  0.2784189915
## [456]  0.2778449303  0.2773333477  0.2770937472  0.2761998872  0.2722656240
## [461]  0.2714833909  0.2703430516  0.2702892342  0.2702392658  0.2701327759
## [466]  0.2700474172  0.2696277359  0.2693852955  0.2683426366  0.2668735312
## [471]  0.2667270439  0.2663577982  0.2652907635  0.2652382522  0.2638999221
## [476]  0.2633712510  0.2628675428  0.2602835749  0.2601046509  0.2597847119
## [481]  0.2595501765  0.2593934274  0.2581736557  0.2581700590  0.2574889206
## [486]  0.2553922086  0.2552797002  0.2549838388  0.2529589855  0.2519047647
## [491]  0.2507572508  0.2498272037  0.2485062619  0.2478398067  0.2477550553
## [496]  0.2472787331  0.2457522294  0.2447098267  0.2446114379  0.2445548336
## [501]  0.2444888416  0.2440945563  0.2419831877  0.2417273483  0.2406980543
## [506]  0.2405293424  0.2404364954  0.2400105158  0.2397433814  0.2396799433
## [511]  0.2382119441  0.2380275501  0.2376628436  0.2360454422  0.2354824450
## [516]  0.2339077113  0.2337601801  0.2334197248  0.2325396785  0.2313509834
## [521]  0.2311853123  0.2311071399  0.2310550612  0.2310401619  0.2297069582
## [526]  0.2294211524  0.2290084683  0.2280410566  0.2269880585  0.2254537619
## [531]  0.2246044709  0.2236113541  0.2233473532  0.2233207730  0.2232793752
## [536]  0.2230380883  0.2224452549  0.2218739812  0.2218057188  0.2216892212
## [541]  0.2210514461  0.2210157024  0.2209638644  0.2198697887  0.2198130388
## [546]  0.2195385209  0.2180797252  0.2173512274  0.2170133835  0.2161431930
## [551]  0.2153776908  0.2132494608  0.2118165565  0.2114983588  0.2092178463
## [556]  0.2090544346  0.2084371052  0.2079274982  0.2073648331  0.2069820739
## [561]  0.2062634191  0.2062403258  0.2033864034  0.2020957881  0.2015373910
## [566]  0.2014335942  0.2012669541  0.1994517868  0.1979538499  0.1978013529
## [571]  0.1974763681  0.1972380094  0.1957085339  0.1956035912  0.1944864763
## [576]  0.1944140178  0.1943798147  0.1942104725  0.1932615990  0.1929491348
## [581]  0.1929355766  0.1919382207  0.1917929546  0.1898447102  0.1893171086
## [586]  0.1888911747  0.1875123320  0.1870744219  0.1870583993  0.1859666269
## [591]  0.1856449725  0.1853272695  0.1852725214  0.1848322410  0.1845264178
## [596]  0.1834193621  0.1834113117  0.1827933313  0.1803753074  0.1800183562
## [601]  0.1795195026  0.1789263522  0.1787582773  0.1765957710  0.1765020675
## [606]  0.1758499757  0.1747257478  0.1741553941  0.1736273185  0.1734752927
## [611]  0.1734384821  0.1711977842  0.1690217946  0.1682364353  0.1675652039
## [616]  0.1662936957  0.1651768534  0.1650765745  0.1648855935  0.1643299132
## [621]  0.1638131663  0.1636840905  0.1635583980  0.1630589553  0.1623780875
## [626]  0.1602825296  0.1600808687  0.1600333310  0.1590259954  0.1590215829
## [631]  0.1580714900  0.1576159928  0.1574214488  0.1572622911  0.1566467251
## [636]  0.1559315165  0.1550861223  0.1547064436  0.1537414799  0.1536031362
## [641]  0.1530932318  0.1519378937  0.1518577163  0.1516915155  0.1512544732
## [646]  0.1480561071  0.1477424536  0.1475984849  0.1464254703  0.1464135710
## [651]  0.1456580059  0.1435091803  0.1433036321  0.1427672539  0.1410487269
## [656]  0.1409365537  0.1408916529  0.1404887763  0.1387920976  0.1383855160
## [661]  0.1380545992  0.1378195993  0.1376661003  0.1372128362  0.1371363326
## [666]  0.1364831519  0.1353430826  0.1347984870  0.1346450284  0.1346137367
## [671]  0.1319665840  0.1318896884  0.1293816370  0.1288214984  0.1264599301
## [676]  0.1263151870  0.1259975951  0.1258695008  0.1249112401  0.1237706912
## [681]  0.1237040504  0.1233057562  0.1230431818  0.1229420393  0.1223853892
## [686]  0.1214564782  0.1208880276  0.1202206609  0.1192149512  0.1188968485
## [691]  0.1183535491  0.1169367582  0.1163529252  0.1162713238  0.1158727264
## [696]  0.1149517025  0.1148784291  0.1148705814  0.1132989708  0.1132538917
## [701]  0.1119466917  0.1117838990  0.1117306701  0.1108235933  0.1103070587
## [706]  0.1100837096  0.1095926184  0.1094195839  0.1091692897  0.1090869407
## [711]  0.1088843278  0.1084630989  0.1084142588  0.1078661481  0.1074334108
## [716]  0.1074143757  0.1071578158  0.1070422766  0.1068507532  0.1052474774
## [721]  0.1044367158  0.1041347254  0.1039287563  0.1032474028  0.1024844361
## [726]  0.1000595662  0.0987138687  0.0985269905  0.0984799133  0.0973250663
## [731]  0.0967031294  0.0944285930  0.0931377582  0.0912339857  0.0910567377
## [736]  0.0901650309  0.0889590850  0.0872514633  0.0866660134  0.0865855071
## [741]  0.0858351603  0.0857888447  0.0843083987  0.0842533047  0.0835931077
## [746]  0.0831015207  0.0830587188  0.0828541072  0.0816691197  0.0814156505
## [751]  0.0814129053  0.0813805535  0.0797736454  0.0797270423  0.0777693303
## [756]  0.0777623577  0.0776816297  0.0771878401  0.0764744986  0.0753210353
## [761]  0.0703215897  0.0691591779  0.0690544048  0.0671701843  0.0660158292
## [766]  0.0656250837  0.0655396056  0.0654031189  0.0649682570  0.0637908306
## [771]  0.0623014205  0.0614831595  0.0606548649  0.0597958071  0.0593417330
## [776]  0.0585614464  0.0584453537  0.0568748599  0.0561119690  0.0557193261
## [781]  0.0554110236  0.0542831342  0.0542630649  0.0531436630  0.0522198132
## [786]  0.0513444564  0.0493307344  0.0469224200  0.0468711763  0.0465352407
## [791]  0.0451358513  0.0450907913  0.0448457841  0.0446400743  0.0445159673
## [796]  0.0413595538  0.0391529770  0.0373120770  0.0366231918  0.0336880040
## [801]  0.0316459624  0.0312996173  0.0310675636  0.0295148959  0.0289543454
## [806]  0.0281205702  0.0275728869  0.0254882685  0.0250705334  0.0237867918
## [811]  0.0233783422  0.0221207239  0.0219986556  0.0215581695  0.0207563122
## [816]  0.0202852678  0.0158214884  0.0134469372  0.0124686402  0.0122678940
## [821]  0.0118137556  0.0109645494  0.0068609495  0.0055530162  0.0047741233
## [826]  0.0038125624  0.0031415837  0.0012224256  0.0005809713  0.0005123169
## [831] -0.0008166294 -0.0013639825 -0.0025423853 -0.0026929558 -0.0039823150
## [836] -0.0047527818 -0.0051450141 -0.0065122242 -0.0069104543 -0.0071005801
## [841] -0.0072400297 -0.0075342198 -0.0105540616 -0.0106323021 -0.0111242360
## [846] -0.0119398724 -0.0123694113 -0.0124408091 -0.0124975564 -0.0186264932
## [851] -0.0214997762 -0.0217440427 -0.0218670944 -0.0263171488 -0.0341890993
## [856] -0.0342667284 -0.0344997323 -0.0348272237 -0.0367457580 -0.0377356517
## [861] -0.0386110405 -0.0387889615 -0.0391235913 -0.0407804967 -0.0413084844
## [866] -0.0434121884 -0.0455470783 -0.0462323217 -0.0480155974 -0.0485062684
## [871] -0.0494706903 -0.0497303958 -0.0501862179 -0.0511803119 -0.0525105488
## [876] -0.0542340685 -0.0557802857 -0.0565144187 -0.0569201544 -0.0570846436
## [881] -0.0572457738 -0.0594330054 -0.0600589237 -0.0612300420 -0.0612376663
## [886] -0.0636715033 -0.0647785937 -0.0664803547 -0.0691765469 -0.0705784689
## [891] -0.0707420002 -0.0714183346 -0.0723145022 -0.0730460766 -0.0731578080
## [896] -0.0736467685 -0.0744905696 -0.0745510193 -0.0749353628 -0.0773518204
## [901] -0.0786399966 -0.0789874393 -0.0806376363 -0.0835055100 -0.0847773610
## [906] -0.0911112395 -0.0928303867 -0.0965177754 -0.0973519238 -0.0975965277
## [911] -0.0977497999 -0.0978402717 -0.0992522951 -0.1026482087 -0.1040928457
## [916] -0.1049207516 -0.1054756439 -0.1055289357 -0.1055351273 -0.1067349179
## [921] -0.1110542638 -0.1112869194 -0.1113882814 -0.1121190257 -0.1189274026
## [926] -0.1194740088 -0.1206689988 -0.1207859942 -0.1248770292 -0.1257310468
## [931] -0.1269940186 -0.1290933468 -0.1341899745 -0.1379386084 -0.1386841426
## [936] -0.1394628297 -0.1436112476 -0.1478528240 -0.1493875184 -0.1501635001
## [941] -0.1616904918 -0.1658234676 -0.1668788975 -0.1690534325 -0.1698156102
## [946] -0.1703117193 -0.1716959622 -0.1734493967 -0.1790781758 -0.1895386836
## [951] -0.1961735600 -0.2014447238 -0.2023641429 -0.2048938819 -0.2071694286
## [956] -0.2077327065 -0.2202263256 -0.2225239289 -0.2265855710 -0.2275710570
## [961] -0.2313608439 -0.2366489703 -0.2433134489 -0.2561980809 -0.2564799647
## [966] -0.2585569727 -0.2707344269 -0.2792153490 -0.2949213840 -0.3051361496
## [971] -0.3127248418 -0.3169822939 -0.3172382123 -0.3277747027 -0.3414482559
## [976] -0.3481379614 -0.3752930118 -0.3766705430 -0.3924142210 -0.4471581365
## [981] -0.5242650558 -0.5273423988

rownames(annotation_col)<- rownames(OV.Surv_Score)

# Specify colors
ann_colors = list(
  mPS_QHL= c("Low"="black", "High"="red"),
  mPS_Q= c("Q1"="black","Q2"="yellow", "Q3"="orange", "Q4"= "red")
)

topDEgenes<- topDEgenes[order(topDEgenes$logFC, decreasing=T),]

dim(topDEgenes)

## [1] 4441    7

summary(decideTests(fit.ebayes))

##        (Intercept) OV.Surv_Score$batchGSE14764 OV.Surv_Score$batchGSE18520
## Down             0                           0                         290
## NotSig           0                       10225                        9882
## Up           10225                           0                          53
##        OV.Surv_Score$batchGSE26193 OV.Surv_Score$batchGSE26712
## Down                             0                          11
## NotSig                       10225                       10192
## Up                               0                          22
##        OV.Surv_Score$batchGSE63855 OV.Surv_Score$lamda.min_QHLHigh
## Down                             0                            2453
## NotSig                       10225                            5784
## Up                               0                            1988

pheatmap(OV_TCGA_GSE.Counts_All[rownames(topDEgenes)[c(1:20, nrow(topDEgenes):(nrow(topDEgenes)-20))],],
         color=colfunc(ncol(OV_TCGA_GSE.Counts_All)),
         scale= "row", clustering_method = "ward.D2",
         cluster_cols = F,cluster_rows = F,
         annotation_col = annotation_col, annotation_colors = ann_colors,
         show_colnames = F,
         main= "Top20DE genes good vs poor prognosis")

#MDSplot


par(mfrow= c(1,2))
my.colors<- c("black","blue", "green","red")
set.seed(123)
plotMDS(OV_TCGA_GSE.Counts_All[rownames(topDEgenes),], col= my.colors[c(1,4)][OV.Surv_Score$lamda.min_QHL], pch= 20)
legend("topleft", legend= levels(OV.Surv_Score$lamda.min_QHL), col=my.colors[c(1,4)], pch=19 )

set.seed(123)
plotMDS(OV_TCGA_GSE.Counts_All[rownames(topDEgenes),], col= my.colors[1:4][OV.Surv_Score$lamda.min_Q], pch= 20)
legend("topleft", legend= levels(OV.Surv_Score$lamda.min_Q), col=my.colors[1:4], pch=19 )

sessionInfo()

## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] TCGAbiolinks_2.20.1         RColorBrewer_1.1-2         
##  [3] SummarizedExperiment_1.22.0 GenomicRanges_1.44.0       
##  [5] GenomeInfoDb_1.28.4         MatrixGenerics_1.4.3       
##  [7] matrixStats_0.61.0          pheatmap_1.0.12            
##  [9] survivalROC_1.0.3           ROCR_1.0-11                
## [11] glmnet_4.1-3                Matrix_1.4-0               
## [13] org.Hs.eg.db_3.13.0         AnnotationDbi_1.54.1       
## [15] IRanges_2.26.0              S4Vectors_0.30.2           
## [17] Biobase_2.52.0              BiocGenerics_0.38.0        
## [19] limma_3.48.3                survminer_0.4.9            
## [21] ggpubr_0.4.0                ggplot2_3.3.5              
## [23] survival_3.2-13            
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.0-2            ggsignif_0.6.3             
##   [3] ellipsis_0.3.2              XVector_0.32.0             
##   [5] gridtext_0.1.4              ggtext_0.1.1               
##   [7] rstudioapi_0.13             farver_2.1.0               
##   [9] bit64_4.0.5                 fansi_1.0.2                
##  [11] xml2_1.3.3                  R.methodsS3_1.8.1          
##  [13] codetools_0.2-18            splines_4.1.0              
##  [15] cachem_1.0.6                knitr_1.37                 
##  [17] jsonlite_1.7.3              broom_0.7.12               
##  [19] km.ci_0.5-2                 dbplyr_2.1.1               
##  [21] GO.db_3.13.0                R.oo_1.24.0                
##  [23] png_0.1-7                   readr_2.1.2                
##  [25] compiler_4.1.0              httr_1.4.2                 
##  [27] backports_1.4.1             assertthat_0.2.1           
##  [29] fastmap_1.1.0               cli_3.1.1                  
##  [31] prettyunits_1.1.1           htmltools_0.5.2            
##  [33] tools_4.1.0                 gtable_0.3.0               
##  [35] glue_1.6.1                  GenomeInfoDbData_1.2.6     
##  [37] dplyr_1.0.7                 rappdirs_0.3.3             
##  [39] Rcpp_1.0.8                  carData_3.0-5              
##  [41] jquerylib_0.1.4             vctrs_0.3.8                
##  [43] Biostrings_2.60.2           iterators_1.0.13           
##  [45] xfun_0.29                   stringr_1.4.0              
##  [47] rvest_1.0.2                 lifecycle_1.0.1            
##  [49] XML_3.99-0.8                statmod_1.4.36             
##  [51] rstatix_0.7.0               zlibbioc_1.38.0            
##  [53] zoo_1.8-9                   scales_1.1.1               
##  [55] vroom_1.5.7                 hms_1.1.1                  
##  [57] curl_4.3.2                  yaml_2.2.2                 
##  [59] memoise_2.0.1               gridExtra_2.3              
##  [61] downloader_0.4              KMsurv_0.1-5               
##  [63] sass_0.4.0                  biomaRt_2.48.3             
##  [65] stringi_1.7.6               RSQLite_2.2.9              
##  [67] highr_0.9                   foreach_1.5.1              
##  [69] filelock_1.0.2              shape_1.4.6                
##  [71] rlang_1.0.0                 pkgconfig_2.0.3            
##  [73] bitops_1.0-7                evaluate_0.14              
##  [75] TCGAbiolinksGUI.data_1.12.0 lattice_0.20-45            
##  [77] purrr_0.3.4                 labeling_0.4.2             
##  [79] cowplot_1.1.1               bit_4.0.4                  
##  [81] tidyselect_1.1.1            plyr_1.8.6                 
##  [83] magrittr_2.0.2              R6_2.5.1                   
##  [85] generics_0.1.1              DelayedArray_0.18.0        
##  [87] DBI_1.1.2                   pillar_1.6.5               
##  [89] withr_2.4.3                 KEGGREST_1.32.0            
##  [91] abind_1.4-5                 RCurl_1.98-1.5             
##  [93] tibble_3.1.6                crayon_1.4.2               
##  [95] car_3.0-12                  survMisc_0.5.5             
##  [97] utf8_1.2.2                  BiocFileCache_2.0.0        
##  [99] tzdb_0.2.0                  rmarkdown_2.11             
## [101] progress_1.2.2              grid_4.1.0                 
## [103] data.table_1.14.2           blob_1.2.2                 
## [105] digest_0.6.29               xtable_1.8-4               
## [107] tidyr_1.1.4                 R.utils_2.11.0             
## [109] munsell_0.5.0               bslib_0.3.1

CoxglmnetPredictorgenes_Final_GSE26193.R

SID-MBP-R

2022-05-30