validation

library(readxl)

sheet_names <- excel_sheets("/Users/ozlemtuna/Downloads/48_Schabath_2016-2.xlsx")

data_list <- lapply(sheet_names, function(sheet) {
  read_excel("/Users/ozlemtuna/Downloads/48_Schabath_2016-2.xlsx", sheet = sheet)
})

names(data_list) <- sheet_names

patients <- data_list[[1]]
sample <- data_list[[2]]
exp <- data_list[[3]]

exp <- as.data.frame(exp)
rownames(exp) <- exp$EntrezID
exp <- exp[, -1]

library(biomaRt)
mart <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
entrez_ids <- rownames(exp)
genes <- getBM(attributes = c("entrezgene_id", "hgnc_symbol"), 
               filters = "entrezgene_id", 
               values = entrez_ids, 
               mart = mart)
colnames(genes) <- c("EntrezID", "GeneSymbol")
exp$GeneSymbol <- genes$GeneSymbol[match(rownames(exp), genes$EntrezID)]
expa <- exp[!is.na(exp$GeneSymbol) & exp$GeneSymbol != "", ]
rownames(expa) <- make.names(expa$GeneSymbol, unique = TRUE)
expa$GeneSymbol <- NULL

exps <- expa
exps <- as.matrix(do.call(cbind, exps))
ranked_vector <- rank(as.vector(exps), ties.method = "min")
ranked_vector <- ranked_vector - min(ranked_vector)
expa_ranked <- matrix(ranked_vector, nrow = nrow(exps), ncol = ncol(exps))

expa_ranked <- as.data.frame(expa_ranked)

rownames(expa_ranked) <- rownames(expa)
colnames(expa_ranked) <- colnames(expa)

patients$overall_survival <- patients$Pat_Overall_Survival_Months * 30
patients$deceased <- ifelse(patients$Pat_Died == 0, FALSE, TRUE)
patients$sample <- sample$Sam_Name
rownames(patients) <- patients$sample

## Warning: Setting row names on a tibble is deprecated.

library(survival)
library(survminer)

## Loading required package: ggplot2

## Loading required package: ggpubr

## 
## Attaching package: 'survminer'

## The following object is masked from 'package:survival':
## 
##     myeloma

luad_final_genes <- readRDS("/Users/ozlemtuna/final_results/06-03-2025_luad_final_genes.rds")

selected <- rownames(luad_final_genes)
ranked <- expa_ranked[selected,]
ranked <- t(ranked)


selec <- c("overall_survival", "deceased")
patientss <- patients[,selec]
rownames(patientss) <- patients$sample

## Warning: Setting row names on a tibble is deprecated.

datas <- cbind(patientss, ranked)
coef_values <- luad_final_genes$coef
gen_expression <- datas[, 3:ncol(datas)]
risk_scores <- apply(gen_expression, 1, function(x) sum(x * coef_values))
risk_scores <- as.data.frame(risk_scores)
datas$risk_scores <- risk_scores$risk_scores

res.cut <- surv_cutpoint(datas, time = "overall_survival", event = "deceased", variables = c("risk_scores"))
cutpoint <- res.cut[["cutpoint"]][["cutpoint"]]
datas$risk_scores <- ifelse(datas$risk_scores > cutpoint, "HIGH", "LOW")

fit_risk_scores <- survfit(Surv(overall_survival, deceased) ~ risk_scores, data = datas)
ggsurvplot(fit_risk_scores, datas, pval = TRUE, risk.table = TRUE)

coxa <- coxph(Surv(overall_survival, deceased) ~ risk_scores, data = datas)
coxa

## Call:
## coxph(formula = Surv(overall_survival, deceased) ~ risk_scores, 
##     data = datas)
## 
##                   coef exp(coef) se(coef)      z     p
## risk_scoresLOW -0.2608    0.7705   0.2091 -1.247 0.212
## 
## Likelihood ratio test=1.5  on 1 df, p=0.2213
## n= 398, number of events= 113 
##    (44 observations deleted due to missingness)

validation_1

ozlemtuna

2025-02-28