library(readxl)
sheet_names <- excel_sheets("/Users/ozlemtuna/Downloads/48_Schabath_2016-2.xlsx")
data_list <- lapply(sheet_names, function(sheet) {
read_excel("/Users/ozlemtuna/Downloads/48_Schabath_2016-2.xlsx", sheet = sheet)
})
names(data_list) <- sheet_names
patients <- data_list[[1]]
sample <- data_list[[2]]
exp <- data_list[[3]]
exp <- as.data.frame(exp)
rownames(exp) <- exp$EntrezID
exp <- exp[, -1]
library(biomaRt)
mart <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
entrez_ids <- rownames(exp)
genes <- getBM(attributes = c("entrezgene_id", "hgnc_symbol"),
filters = "entrezgene_id",
values = entrez_ids,
mart = mart)
colnames(genes) <- c("EntrezID", "GeneSymbol")
exp$GeneSymbol <- genes$GeneSymbol[match(rownames(exp), genes$EntrezID)]
expa <- exp[!is.na(exp$GeneSymbol) & exp$GeneSymbol != "", ]
rownames(expa) <- make.names(expa$GeneSymbol, unique = TRUE)
expa$GeneSymbol <- NULL
exps <- expa
exps <- as.matrix(do.call(cbind, exps))
ranked_vector <- rank(as.vector(exps), ties.method = "min")
ranked_vector <- ranked_vector - min(ranked_vector)
expa_ranked <- matrix(ranked_vector, nrow = nrow(exps), ncol = ncol(exps))
expa_ranked <- as.data.frame(expa_ranked)
rownames(expa_ranked) <- rownames(expa)
colnames(expa_ranked) <- colnames(expa)
patients$overall_survival <- patients$Pat_Overall_Survival_Months * 30
patients$deceased <- ifelse(patients$Pat_Died == 0, FALSE, TRUE)
patients$sample <- sample$Sam_Name
rownames(patients) <- patients$sample
## Warning: Setting row names on a tibble is deprecated.
library(survival)
library(survminer)
## Loading required package: ggplot2
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
luad_final_genes <- readRDS("/Users/ozlemtuna/final_results/06-03-2025_luad_final_genes.rds")
selected <- rownames(luad_final_genes)
ranked <- expa_ranked[selected,]
ranked <- t(ranked)
selec <- c("overall_survival", "deceased")
patientss <- patients[,selec]
rownames(patientss) <- patients$sample
## Warning: Setting row names on a tibble is deprecated.
datas <- cbind(patientss, ranked)
coef_values <- luad_final_genes$coef
gen_expression <- datas[, 3:ncol(datas)]
risk_scores <- apply(gen_expression, 1, function(x) sum(x * coef_values))
risk_scores <- as.data.frame(risk_scores)
datas$risk_scores <- risk_scores$risk_scores
res.cut <- surv_cutpoint(datas, time = "overall_survival", event = "deceased", variables = c("risk_scores"))
cutpoint <- res.cut[["cutpoint"]][["cutpoint"]]
datas$risk_scores <- ifelse(datas$risk_scores > cutpoint, "HIGH", "LOW")
fit_risk_scores <- survfit(Surv(overall_survival, deceased) ~ risk_scores, data = datas)
ggsurvplot(fit_risk_scores, datas, pval = TRUE, risk.table = TRUE)

coxa <- coxph(Surv(overall_survival, deceased) ~ risk_scores, data = datas)
coxa
## Call:
## coxph(formula = Surv(overall_survival, deceased) ~ risk_scores,
## data = datas)
##
## coef exp(coef) se(coef) z p
## risk_scoresLOW -0.2608 0.7705 0.2091 -1.247 0.212
##
## Likelihood ratio test=1.5 on 1 df, p=0.2213
## n= 398, number of events= 113
## (44 observations deleted due to missingness)