#library(GEOquery)
#gse <- getGEO("GSE73403", GSEMatrix = TRUE)
#eset <- gse[[1]]
#exp <- exprs(eset)
#saveRDS(exp, "~/final_results/geo_lusc_exp.rds")
exp <- readRDS("~/final_results/geo_lusc_exp.rds")
#metadata <- gse[["GSE73403_series_matrix.txt.gz"]]@phenoData@data
#saveRDS(metadata, "~/final_results/geo_lusc_metadata.rds")
metadata <- readRDS("~/final_results/geo_lusc_metadata.rds")
metadata$`survival (year):ch1` <- as.numeric(metadata$`survival (year):ch1`)
metadata$overall_survival <- metadata$`survival (year):ch1`*365
metadata$vital_status <- metadata$`events (death = 1, alive = 0):ch1`
metadata$deceased <- ifelse(metadata$vital_status == 0, FALSE, TRUE)
selected <- c("overall_survival","deceased")
meta <- metadata[,selected]
#a <- eset@featureData@data 
#saveRDS(a, "~/final_results/geo_lusc_feature.rds")
a <- readRDS("~/final_results/geo_lusc_feature.rds")
exp_df <- as.data.frame(exp)
exp_df$gene <- a$GENE_SYMBOL
exp_df_clean <- exp_df[!is.na(exp_df$gene) & exp_df$gene != "", ]
exp_df_clean$gene <- make.unique(as.character(exp_df_clean$gene))
rownames(exp_df_clean) <- exp_df_clean$gene
exp_df_clean <- exp_df_clean[,-70]
exps <- exp_df_clean

exps <- as.matrix(do.call(cbind, exps))
ranked_vector <- rank(as.vector(exps), ties.method = "min")
ranked_vector <- ranked_vector - min(ranked_vector)
expa_ranked <- matrix(ranked_vector, nrow = nrow(exps), ncol = ncol(exps))

expa_ranked <- as.data.frame(expa_ranked)

rownames(expa_ranked) <- rownames(exp_df_clean)
colnames(expa_ranked) <- colnames(exp_df_clean)
library(survival)
library(survminer)
## Loading required package: ggplot2
## Loading required package: ggpubr
## 
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
## 
##     myeloma
lusc_final_genes <- readRDS("/Users/ozlemtuna/final_results/lusc_final_genes.rds")

selected <- rownames(lusc_final_genes)
ranked <- expa_ranked[selected,]
ranked <- t(ranked)
ranked_cleaned <- ranked[, !colnames(ranked) %in% c("NA", "NA.1", "NA.2","NA.3")]

datas <- cbind(meta, ranked_cleaned)

lusc_final_genes <- lusc_final_genes[colnames(ranked_cleaned),]

coef_values <- lusc_final_genes$coef
gen_expression <- datas[, 3:ncol(datas)]
risk_scores <- apply(gen_expression, 1, function(x) sum(x * coef_values))
risk_scores <- as.data.frame(risk_scores)
datas$risk_scores <- risk_scores$risk_scores

res.cut <- surv_cutpoint(datas, time = "overall_survival", event = "deceased", variables = c("risk_scores"))
cutpoint <- res.cut[["cutpoint"]][["cutpoint"]]
datas$risk_scores <- ifelse(datas$risk_scores > cutpoint, "HIGH", "LOW")

fit_risk_scores <- survfit(Surv(overall_survival, deceased) ~ risk_scores, data = datas)
ggsurvplot(fit_risk_scores, datas, pval = TRUE, risk.table = TRUE)

coxa <- coxph(Surv(overall_survival, deceased) ~ risk_scores, data = datas)
coxa
## Call:
## coxph(formula = Surv(overall_survival, deceased) ~ risk_scores, 
##     data = datas)
## 
##                   coef exp(coef) se(coef)      z      p
## risk_scoresLOW -1.2066    0.2992   0.5408 -2.231 0.0257
## 
## Likelihood ratio test=6.48  on 1 df, p=0.01089
## n= 69, number of events= 28