library(survival)
library(survminer)
## Loading required package: ggplot2
## Loading required package: ggpubr
## 
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
## 
##     myeloma
sq <- readRDS("~/final_results/14_02_2025_deconvolution_sq.rds")
deconvolution_sq_scaled <- sq / rowSums(sq) * 100
lusc_clinic_for_bulk <- readRDS("~/final_results/lusc_clinic_for_bulk.rds")
data <- cbind(lusc_clinic_for_bulk, deconvolution_sq_scaled)

data$overall_survival <- as.numeric(data$overall_survival)
data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
cell_types <- c("Alveolar cell type 1", "Alveolar cell type 2", "B cell", "B cell dividing","cDC1", "cDC2", "Ciliated",  "Club", "DC mature", "Endothelial cell arterial",  "Endothelial cell lymphatic", "Endothelial cell venous",  "Fibroblast adventitial", "Fibroblast alveolar", "Fibroblast peribronchial","Macrophage", "Macrophage alveolar", "Mast cell", "Mesothelial", "Monocyte classical", "Monocyte non-classical", "Myeloid dividing","NK cell dividing", "pDC", "Pericyte",  "Plasma cell", "ROS1+ healthy epithelial","Smooth muscle cell", "Stromal dividing", "T cell CD4","T cell CD4 dividing", "T cell CD8 activated", "T cell CD8 dividing", "T cell CD8 naive", "T cell CD8 terminally exhausted", "T cell NK-like", "T cell regulatory", "Transitional Club/AT2")

p_values_df <- data.frame(Cell_Type = character(), P_Value = numeric(), stringsAsFactors = FALSE)

for (cell in cell_types) {
    res.cut <- surv_cutpoint(data, time = "overall_survival", event = "deceased", variables = c(cell))
    cutpoint <- res.cut[["cutpoint"]][["cutpoint"]]
  
    group_col <- gsub(" ", "_", cell)  
    data[[group_col]] <- ifelse(data[[cell]] > cutpoint, "HIGH", "LOW")
    
    fit <- survfit(Surv(overall_survival, deceased) ~ data[[group_col]], data = data)
    
    ggsurvplot(fit, data = data, pval = TRUE, risk.table = TRUE)
    
    surv_diff <- survdiff(Surv(overall_survival, deceased) ~ data[[group_col]], data = data)
    p_value <- 1 - pchisq(surv_diff$chisq, length(surv_diff$n) - 1)
    
    p_values_df <- rbind(p_values_df, data.frame(Cell_Type = cell, P_Value = p_value, stringsAsFactors = FALSE))
}
p_values_df$p.adj <- p.adjust(p_values_df$P_Value, method = "BH")
selected_cell_types <- p_values_df[p_values_df$p.adj < 0.05,]
library(reactable)
reactable(selected_cell_types)
library(survival)
library(survminer)
signature_sq_matrix <- readRDS("~/final_results/14_02_2025_signature_sq_matrix.rds")
sq <- readRDS("~/final_results/14_02_2025_deconvolution_sq.rds")
lusc_clinic_for_bulk <- readRDS("~/final_results/lusc_clinic_for_bulk.rds")
deconvolution_sq_scaled <- sq / rowSums(sq) * 100
data <- cbind(lusc_clinic_for_bulk, deconvolution_sq_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))

data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
res.cut <- surv_cutpoint(data, time = "overall_survival", event = "deceased", 
                         variables = c("Macrophage_alveolar"))
cutpoint <- res.cut[["cutpoint"]][["cutpoint"]]
data$Macrophage_alveolar <- ifelse(data$Macrophage_alveolar > cutpoint, "HIGH", "LOW")
fit_Macrophage_alveolar <- survfit(Surv(overall_survival, deceased) ~ Macrophage_alveolar, data = data)
ggsurvplot(fit_Macrophage_alveolar, 
           data, 
           pval = TRUE,   
           risk.table = TRUE)  

data <- cbind(lusc_clinic_for_bulk, deconvolution_sq_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))
colnames(data) <- gsub("-", "_", colnames(data))
coxa <- coxph(Surv(overall_survival, deceased) ~ Macrophage_alveolar, data = data) 
coxa
## Call:
## coxph(formula = Surv(overall_survival, deceased) ~ Macrophage_alveolar, 
##     data = data)
## 
##                        coef exp(coef) se(coef)     z      p
## Macrophage_alveolar 0.06940   1.07187  0.03033 2.288 0.0221
## 
## Likelihood ratio test=4.5  on 1 df, p=0.03398
## n= 144, number of events= 143 
##    (253 observations deleted due to missingness)
library(survival)
library(reactable)
signature_sq_matrix <- as.data.frame(signature_sq_matrix)
Macrophage_alveolar_exp <- signature_sq_matrix$`Macrophage alveolar`
other_cells_exp <- rowMeans(signature_sq_matrix[, colnames(signature_sq_matrix) != "Macrophage_alveolar"])
Macrophage_alveolar_lusc_filtered_genes <- signature_sq_matrix[(Macrophage_alveolar_exp - other_cells_exp) / other_cells_exp > 0.90, ]
sq_bulk <- readRDS("~/final_results/lusc_bulk.rds")
Macrophage_alveolar <- intersect(rownames(Macrophage_alveolar_lusc_filtered_genes), rownames(sq_bulk))
Macrophage_alveolar <- sq_bulk[Macrophage_alveolar,]
Macrophage_alveolar <- t(Macrophage_alveolar)
Macrophage_alveolar <- as.data.frame(Macrophage_alveolar)
lusc_BULK_CLINIC <- readRDS("~/final_results/lusc_clinic_for_bulk.rds")
lusc_BULK_CLINIC <- as.data.frame(lusc_BULK_CLINIC)
select_col <- c("overall_survival", "vital_status")
clinic <- lusc_BULK_CLINIC[,select_col]
data <- cbind(clinic, Macrophage_alveolar)
data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
library(survival)
exclude_cols <- c("overall_survival", "vital_status", "deceased")
gene_cols <- setdiff(colnames(data), exclude_cols)
cox_results <- list()
for (gene in gene_cols) {
  formula <- as.formula(paste("Surv(overall_survival, deceased) ~", gene))
  cox_model <- coxph(formula, data = data)
  cox_results[[gene]] <- summary(cox_model)
}

genes <- c()
coeffs <- c()
for (gene in names(cox_results)) {
  coef_value <- cox_results[[gene]]$coefficients[1, "coef"] 
  
  if (!is.na(coef_value) && coef_value > 0) {  
    genes <- c(genes, gene)
    coeffs <- c(coeffs, coef_value) 
  }
}

library(biomaRt)
ensembl <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl", version = 105)
gene_annotation <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol"), 
                         filters = "ensembl_gene_id", 
                         values = genes, 
                         mart = ensembl)
ort <- intersect(genes, rownames(signature_sq_matrix))
sig <- signature_sq_matrix[ort, ]

Macrophage_alveolar_gene_df <- data.frame(
  ENSG_ID = genes,   
  Gene_Name = gene_annotation$hgnc_symbol,  
  Coefficient = coeffs,
  Cell_Type = "Macrophage_alveolar"
)

hazard_ratios <- sapply(genes, function(gene) {
  if (gene %in% names(cox_results)) {
    return(cox_results[[gene]]$coefficients[1, "exp(coef)"]) 
  } else {
    return(NA)  
  }
})

Macrophage_alveolar_gene_df$Hazard_Ratio <- hazard_ratios
Macrophage_alveolar_gene_df$exp.level <- sig$`Macrophage alveolar`

reactable(Macrophage_alveolar_gene_df)
library(survival)
library(survminer)
signature_sq_matrix <- readRDS("~/final_results/14_02_2025_signature_sq_matrix.rds")
sq <- readRDS("~/final_results/14_02_2025_deconvolution_sq.rds")
lusc_clinic_for_bulk <- readRDS("~/final_results/lusc_clinic_for_bulk.rds")
deconvolution_sq_scaled <- sq / rowSums(sq) * 100
data <- cbind(lusc_clinic_for_bulk, deconvolution_sq_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))

data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
res.cut <- surv_cutpoint(data, time = "overall_survival", event = "deceased", 
                         variables = c("Monocyte_classical"))
cutpoint <- res.cut[["cutpoint"]][["cutpoint"]]
data$Monocyte_classical <- ifelse(data$Monocyte_classical > cutpoint, "HIGH", "LOW")
fit_Monocyte_classical <- survfit(Surv(overall_survival, deceased) ~ Monocyte_classical, data = data)
ggsurvplot(fit_Monocyte_classical, 
           data, 
           pval = TRUE,   
           risk.table = TRUE)  

data <- cbind(lusc_clinic_for_bulk, deconvolution_sq_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))
colnames(data) <- gsub("-", "_", colnames(data))

coxa <- coxph(Surv(overall_survival, deceased) ~ Monocyte_classical, data = data) 
coxa
## Call:
## coxph(formula = Surv(overall_survival, deceased) ~ Monocyte_classical, 
##     data = data)
## 
##                      coef exp(coef) se(coef)     z     p
## Monocyte_classical 0.2020    1.2238   0.1278 1.581 0.114
## 
## Likelihood ratio test=2.27  on 1 df, p=0.1321
## n= 144, number of events= 143 
##    (253 observations deleted due to missingness)
library(survival)
library(reactable)

signature_sq_matrix <- as.data.frame(signature_sq_matrix)
Monocyte_classical_exp <- signature_sq_matrix$`Monocyte classical`
other_cells_exp <- rowMeans(signature_sq_matrix[, colnames(signature_sq_matrix) != "Monocyte_classical"])
Monocyte_classical_lusc_filtered_genes <- signature_sq_matrix[(Monocyte_classical_exp - other_cells_exp) / other_cells_exp > 0.90, ]
sq_bulk <- readRDS("~/final_results/lusc_bulk.rds")
Monocyte_classical <- intersect(rownames(Monocyte_classical_lusc_filtered_genes), rownames(sq_bulk))
Monocyte_classical <- sq_bulk[Monocyte_classical,]
Monocyte_classical <- t(Monocyte_classical)
Monocyte_classical <- as.data.frame(Monocyte_classical)
lusc_BULK_CLINIC <- readRDS("~/final_results/lusc_clinic_for_bulk.rds")
lusc_BULK_CLINIC <- as.data.frame(lusc_BULK_CLINIC)
select_col <- c("overall_survival", "vital_status")
clinic <- lusc_BULK_CLINIC[,select_col]
data <- cbind(clinic, Monocyte_classical)
data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
library(survival)
exclude_cols <- c("overall_survival", "vital_status", "deceased")
gene_cols <- setdiff(colnames(data), exclude_cols)
cox_results <- list()
for (gene in gene_cols) {
  formula <- as.formula(paste("Surv(overall_survival, deceased) ~", gene))
  cox_model <- coxph(formula, data = data)
  cox_results[[gene]] <- summary(cox_model)
}

genes <- c()
coeffs <- c()
for (gene in names(cox_results)) {
  coef_value <- cox_results[[gene]]$coefficients[1, "coef"] 
  
  if (!is.na(coef_value) && coef_value > 0) {  
    genes <- c(genes, gene)
    coeffs <- c(coeffs, coef_value) 
  }
}

library(biomaRt)
ensembl <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl", version = 105)
gene_annotation <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol"), 
                         filters = "ensembl_gene_id", 
                         values = genes, 
                         mart = ensembl)
ort <- intersect(genes, rownames(signature_sq_matrix))
sig <- signature_sq_matrix[ort, ]

Monocyte_classical_gene_df <- data.frame(
  ENSG_ID = genes,   
  Gene_Name = gene_annotation$hgnc_symbol,  
  Coefficient = coeffs,
  Cell_Type = "Monocyte_classical"
)
hazard_ratios <- sapply(genes, function(gene) {
  if (gene %in% names(cox_results)) {
    return(cox_results[[gene]]$coefficients[1, "exp(coef)"]) 
  } else {
    return(NA)  
  }
})

Monocyte_classical_gene_df$Hazard_Ratio <- hazard_ratios
Monocyte_classical_gene_df$exp.level <- sig$`Monocyte classical`

reactable(Monocyte_classical_gene_df)
library(survival)
library(survminer)
signature_sq_matrix <- readRDS("~/final_results/14_02_2025_signature_sq_matrix.rds")
sq <- readRDS("~/final_results/14_02_2025_deconvolution_sq.rds")
lusc_clinic_for_bulk <- readRDS("~/final_results/lusc_clinic_for_bulk.rds")
deconvolution_sq_scaled <- sq / rowSums(sq) * 100
data <- cbind(lusc_clinic_for_bulk, deconvolution_sq_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))

data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
res.cut <- surv_cutpoint(data, time = "overall_survival", event = "deceased", 
                         variables = c("T_cell_CD8_naive"))
cutpoint <- res.cut[["cutpoint"]][["cutpoint"]]
data$T_cell_CD8_naive <- ifelse(data$T_cell_CD8_naive > cutpoint, "HIGH", "LOW")
fit_T_cell_CD8_naive <- survfit(Surv(overall_survival, deceased) ~ T_cell_CD8_naive, data = data)
ggsurvplot(fit_T_cell_CD8_naive, 
           data, 
           pval = TRUE,   
           risk.table = TRUE)  

data <- cbind(lusc_clinic_for_bulk, deconvolution_sq_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))
colnames(data) <- gsub("-", "_", colnames(data))

coxa <- coxph(Surv(overall_survival, deceased) ~ T_cell_CD8_naive, data = data) 
coxa
## Call:
## coxph(formula = Surv(overall_survival, deceased) ~ T_cell_CD8_naive, 
##     data = data)
## 
##                     coef exp(coef) se(coef)     z      p
## T_cell_CD8_naive 0.12939   1.13813  0.06223 2.079 0.0376
## 
## Likelihood ratio test=4.13  on 1 df, p=0.04213
## n= 144, number of events= 143 
##    (253 observations deleted due to missingness)
library(survival)
library(reactable)

signature_sq_matrix <- as.data.frame(signature_sq_matrix)
T_cell_CD8_naive_exp <- signature_sq_matrix$`T cell CD8 naive`
other_cells_exp <- rowMeans(signature_sq_matrix[, colnames(signature_sq_matrix) != "T_cell_CD8_naive"])
T_cell_CD8_naive_lusc_filtered_genes <- signature_sq_matrix[(T_cell_CD8_naive_exp - other_cells_exp) / other_cells_exp > 0.90, ]
sq_bulk <- readRDS("~/final_results/lusc_bulk.rds")
T_cell_CD8_naive <- intersect(rownames(T_cell_CD8_naive_lusc_filtered_genes), rownames(sq_bulk))
T_cell_CD8_naive <- sq_bulk[T_cell_CD8_naive,]
T_cell_CD8_naive <- t(T_cell_CD8_naive)
T_cell_CD8_naive <- as.data.frame(T_cell_CD8_naive)
lusc_BULK_CLINIC <- readRDS("~/final_results/lusc_clinic_for_bulk.rds")
lusc_BULK_CLINIC <- as.data.frame(lusc_BULK_CLINIC)
select_col <- c("overall_survival", "vital_status")
clinic <- lusc_BULK_CLINIC[,select_col]
data <- cbind(clinic, T_cell_CD8_naive)
data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
library(survival)
exclude_cols <- c("overall_survival", "vital_status", "deceased")
gene_cols <- setdiff(colnames(data), exclude_cols)
cox_results <- list()
for (gene in gene_cols) {
  formula <- as.formula(paste("Surv(overall_survival, deceased) ~", gene))
  cox_model <- coxph(formula, data = data)
  cox_results[[gene]] <- summary(cox_model)
}

genes <- c()
coeffs <- c()
for (gene in names(cox_results)) {
  coef_value <- cox_results[[gene]]$coefficients[1, "coef"] 
  
  if (!is.na(coef_value) && coef_value > 0) {  
    genes <- c(genes, gene)
    coeffs <- c(coeffs, coef_value) 
  }
}

library(biomaRt)
ensembl <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl", version = 105)
gene_annotation <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol"), 
                         filters = "ensembl_gene_id", 
                         values = genes, 
                         mart = ensembl)
ort <- intersect(genes, rownames(signature_sq_matrix))
sig <- signature_sq_matrix[ort, ]

T_cell_CD8_naive_gene_df <- data.frame(
  ENSG_ID = genes,   
  Gene_Name = gene_annotation$hgnc_symbol,  
  Coefficient = coeffs,
  Cell_Type = "T_cell_CD8_naive"
)
hazard_ratios <- sapply(genes, function(gene) {
  if (gene %in% names(cox_results)) {
    return(cox_results[[gene]]$coefficients[1, "exp(coef)"]) 
  } else {
    return(NA)  
  }
})

T_cell_CD8_naive_gene_df$Hazard_Ratio <- hazard_ratios
T_cell_CD8_naive_gene_df$exp.level <- sig$`T cell CD8 naive`

reactable(T_cell_CD8_naive_gene_df)
library(survival)
library(survminer)
signature_sq_matrix <- readRDS("~/final_results/14_02_2025_signature_sq_matrix.rds")
sq <- readRDS("~/final_results/14_02_2025_deconvolution_sq.rds")
lusc_clinic_for_bulk <- readRDS("~/final_results/lusc_clinic_for_bulk.rds")
deconvolution_sq_scaled <- sq / rowSums(sq) * 100
data <- cbind(lusc_clinic_for_bulk, deconvolution_sq_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))

data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
res.cut <- surv_cutpoint(data, time = "overall_survival", event = "deceased", 
                         variables = c("Endothelial_cell_venous"))
cutpoint <- res.cut[["cutpoint"]][["cutpoint"]]
data$Endothelial_cell_venous <- ifelse(data$Endothelial_cell_venous > cutpoint, "HIGH", "LOW")
fit_Endothelial_cell_venous <- survfit(Surv(overall_survival, deceased) ~ Endothelial_cell_venous, data = data)
ggsurvplot(fit_Endothelial_cell_venous, 
           data, 
           pval = TRUE,   
           risk.table = TRUE)  

data <- cbind(lusc_clinic_for_bulk, deconvolution_sq_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))
colnames(data) <- gsub("-", "_", colnames(data))

coxa <- coxph(Surv(overall_survival, deceased) ~ Endothelial_cell_venous, data = data) 
coxa
## Call:
## coxph(formula = Surv(overall_survival, deceased) ~ Endothelial_cell_venous, 
##     data = data)
## 
##                            coef exp(coef) se(coef)     z     p
## Endothelial_cell_venous 0.10096   1.10624  0.06224 1.622 0.105
## 
## Likelihood ratio test=2.47  on 1 df, p=0.1163
## n= 144, number of events= 143 
##    (253 observations deleted due to missingness)
library(survival)
library(reactable)

signature_sq_matrix <- as.data.frame(signature_sq_matrix)
Endothelial_cell_venous_exp <- signature_sq_matrix$`Endothelial cell venous`
other_cells_exp <- rowMeans(signature_sq_matrix[, colnames(signature_sq_matrix) != "Endothelial_cell_venous"])
Endothelial_cell_venous_lusc_filtered_genes <- signature_sq_matrix[(Endothelial_cell_venous_exp - other_cells_exp) / other_cells_exp > 0.90, ]
sq_bulk <- readRDS("~/final_results/lusc_bulk.rds")
Endothelial_cell_venous <- intersect(rownames(Endothelial_cell_venous_lusc_filtered_genes), rownames(sq_bulk))
Endothelial_cell_venous <- sq_bulk[Endothelial_cell_venous,]
Endothelial_cell_venous <- t(Endothelial_cell_venous)
Endothelial_cell_venous <- as.data.frame(Endothelial_cell_venous)
lusc_BULK_CLINIC <- readRDS("~/final_results/lusc_clinic_for_bulk.rds")
lusc_BULK_CLINIC <- as.data.frame(lusc_BULK_CLINIC)
select_col <- c("overall_survival", "vital_status")
clinic <- lusc_BULK_CLINIC[,select_col]
data <- cbind(clinic, Endothelial_cell_venous)
data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
library(survival)
exclude_cols <- c("overall_survival", "vital_status", "deceased")
gene_cols <- setdiff(colnames(data), exclude_cols)
cox_results <- list()
for (gene in gene_cols) {
  formula <- as.formula(paste("Surv(overall_survival, deceased) ~", gene))
  cox_model <- coxph(formula, data = data)
  cox_results[[gene]] <- summary(cox_model)
}

genes <- c()
coeffs <- c()
for (gene in names(cox_results)) {
  coef_value <- cox_results[[gene]]$coefficients[1, "coef"] 
  
  if (!is.na(coef_value) && coef_value > 0) {  
    genes <- c(genes, gene)
    coeffs <- c(coeffs, coef_value) 
  }
}

library(biomaRt)
ensembl <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl", version = 105)
gene_annotation <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol"), 
                         filters = "ensembl_gene_id", 
                         values = genes, 
                         mart = ensembl)
ort <- intersect(genes, rownames(signature_sq_matrix))
sig <- signature_sq_matrix[ort, ]

Endothelial_cell_venous_gene_df <- data.frame(
  ENSG_ID = genes,   
  Gene_Name = gene_annotation$hgnc_symbol,  
  Coefficient = coeffs,
  Cell_Type = "Endothelial_cell_venous"
)
hazard_ratios <- sapply(genes, function(gene) {
  if (gene %in% names(cox_results)) {
    return(cox_results[[gene]]$coefficients[1, "exp(coef)"]) 
  } else {
    return(NA)  
  }
})

Endothelial_cell_venous_gene_df$Hazard_Ratio <- hazard_ratios
Endothelial_cell_venous_gene_df$exp.level <- sig$`Endothelial cell venous`

reactable(Endothelial_cell_venous_gene_df)
library(survival)
library(survminer)
signature_sq_matrix <- readRDS("~/final_results/14_02_2025_signature_sq_matrix.rds")
sq <- readRDS("~/final_results/14_02_2025_deconvolution_sq.rds")
lusc_clinic_for_bulk <- readRDS("~/final_results/lusc_clinic_for_bulk.rds")
deconvolution_sq_scaled <- sq / rowSums(sq) * 100
data <- cbind(lusc_clinic_for_bulk, deconvolution_sq_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))

data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
res.cut <- surv_cutpoint(data, time = "overall_survival", event = "deceased", 
                         variables = c("Fibroblast_alveolar"))
cutpoint <- res.cut[["cutpoint"]][["cutpoint"]]
data$Fibroblast_alveolar <- ifelse(data$Fibroblast_alveolar > cutpoint, "HIGH", "LOW")
fit_Fibroblast_alveolar <- survfit(Surv(overall_survival, deceased) ~ Fibroblast_alveolar, data = data)
ggsurvplot(fit_Fibroblast_alveolar, 
           data, 
           pval = TRUE,   
           risk.table = TRUE)  

data <- cbind(lusc_clinic_for_bulk, deconvolution_sq_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))
colnames(data) <- gsub("-", "_", colnames(data))

coxa <- coxph(Surv(overall_survival, deceased) ~ Fibroblast_alveolar, data = data) 
coxa
## Call:
## coxph(formula = Surv(overall_survival, deceased) ~ Fibroblast_alveolar, 
##     data = data)
## 
##                        coef exp(coef) se(coef)     z     p
## Fibroblast_alveolar 0.08755   1.09150  0.05749 1.523 0.128
## 
## Likelihood ratio test=2.13  on 1 df, p=0.1448
## n= 144, number of events= 143 
##    (253 observations deleted due to missingness)
library(survival)
library(reactable)

signature_sq_matrix <- as.data.frame(signature_sq_matrix)
Fibroblast_alveolar_exp <- signature_sq_matrix$`Fibroblast alveolar`
other_cells_exp <- rowMeans(signature_sq_matrix[, colnames(signature_sq_matrix) != "Fibroblast_alveolar"])
Fibroblast_alveolar_lusc_filtered_genes <- signature_sq_matrix[(Fibroblast_alveolar_exp - other_cells_exp) / other_cells_exp > 0.90, ]
sq_bulk <- readRDS("~/final_results/lusc_bulk.rds")
Fibroblast_alveolar <- intersect(rownames(Fibroblast_alveolar_lusc_filtered_genes), rownames(sq_bulk))
Fibroblast_alveolar <- sq_bulk[Fibroblast_alveolar,]
Fibroblast_alveolar <- t(Fibroblast_alveolar)
Fibroblast_alveolar <- as.data.frame(Fibroblast_alveolar)
lusc_BULK_CLINIC <- readRDS("~/final_results/lusc_clinic_for_bulk.rds")
lusc_BULK_CLINIC <- as.data.frame(lusc_BULK_CLINIC)
select_col <- c("overall_survival", "vital_status")
clinic <- lusc_BULK_CLINIC[,select_col]
data <- cbind(clinic, Fibroblast_alveolar)
data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
library(survival)
exclude_cols <- c("overall_survival", "vital_status", "deceased")
gene_cols <- setdiff(colnames(data), exclude_cols)
cox_results <- list()
for (gene in gene_cols) {
  formula <- as.formula(paste("Surv(overall_survival, deceased) ~", gene))
  cox_model <- coxph(formula, data = data)
  cox_results[[gene]] <- summary(cox_model)
}

genes <- c()
coeffs <- c()
for (gene in names(cox_results)) {
  coef_value <- cox_results[[gene]]$coefficients[1, "coef"] 
  
  if (!is.na(coef_value) && coef_value > 0) {  
    genes <- c(genes, gene)
    coeffs <- c(coeffs, coef_value) 
  }
}

library(biomaRt)
ensembl <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl", version = 105)
gene_annotation <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol"), 
                         filters = "ensembl_gene_id", 
                         values = genes, 
                         mart = ensembl)
ort <- intersect(genes, rownames(signature_sq_matrix))
sig <- signature_sq_matrix[ort, ]

Fibroblast_alveolar_gene_df <- data.frame(
  ENSG_ID = genes,   
  Gene_Name = gene_annotation$hgnc_symbol,  
  Coefficient = coeffs,
  Cell_Type = "Fibroblast_alveolar"
)
hazard_ratios <- sapply(genes, function(gene) {
  if (gene %in% names(cox_results)) {
    return(cox_results[[gene]]$coefficients[1, "exp(coef)"]) 
  } else {
    return(NA)  
  }
})

Fibroblast_alveolar_gene_df$Hazard_Ratio <- hazard_ratios
Fibroblast_alveolar_gene_df$exp.level <- sig$`Fibroblast alveolar`

reactable(Fibroblast_alveolar_gene_df)
library(survival)
library(survminer)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:biomaRt':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df_list <- list(Endothelial_cell_venous_gene_df,Fibroblast_alveolar_gene_df, Macrophage_alveolar_gene_df,Monocyte_classical_gene_df, T_cell_CD8_naive_gene_df)

df_names <- c("Endothelial_cell_venous_gene_df","Fibroblast_alveolar_gene_df", "Macrophage_alveolar_gene_df","Monocyte_classical_gene_df", "T_cell_CD8_naive_gene_df")
library(dplyr)


final_genes <- unique(unlist(lapply(df_list, function(df) {
  if (!is.null(df)) {
    top5_genes <- df %>%
      arrange(desc(exp.level)) %>%
      head(5)
    
  }
})))

filtered_df_list <- lapply(df_list, function(df) {
  if (!is.null(df)) {
    df %>% filter(ENSG_ID %in% final_genes)
  } else {
    return(NULL)
  }
})

final <- unique(unlist(lapply(filtered_df_list, function(df) {
  if (!is.null(df)) {
    top3_genes <- df %>%
      arrange(desc(Hazard_Ratio)) %>%
      head(3)
    
  }
})))




sq <- readRDS("~/final_results/lusc_bulk.rds") |> as.data.frame() |> t() |> as.data.frame()
lusc_clinic_for_bulk <- readRDS("~/final_results/lusc_clinic_for_bulk.rds")
data <- cbind(lusc_clinic_for_bulk, sq)
data$deceased <- ifelse(data$vital_status == "Alive", 0, 1)
ensembl_ids <- final


mart <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl", version = 105)
gene_map <- getBM(
  attributes = c("ensembl_gene_id", "external_gene_name"),
  filters = "ensembl_gene_id",
  values = ensembl_ids,
  mart = mart
)

colnames(data)[colnames(data) %in% gene_map$ensembl_gene_id] <- gene_map$external_gene_name
data <- cbind(lusc_clinic_for_bulk, data[, gene_map$external_gene_name])
data$overall_survival <- as.numeric(data$overall_survival)
univ_cox <- function(variable) {
  formula <- reformulate(variable, response = "Surv(overall_survival, deceased)")
  cox_model <- coxph(formula, data = data)
  cox_summary <- summary(cox_model)
  
  c(
    coef = cox_summary$coefficients[1],  
    HR = exp(cox_summary$coefficients[1]), 
    p_value = cox_summary$coefficients[5], 
    CI_lower = exp(cox_summary$conf.int[ ,3]), 
    CI_upper = exp(cox_summary$conf.int[ ,4])
  )
}
colnames(data) <- gsub("-", "_", colnames(data))

univ_results <- lapply(colnames(data[, 8:ncol(data)]), univ_cox) 
univ_results_df <- do.call(rbind, univ_results) |> as.data.frame()
rownames(univ_results_df) <- colnames(data[, 8:ncol(data)])
gen_expression <- data[, 8:ncol(data)]
coef_values <- univ_results_df$coef
risk_scores <- apply(gen_expression, 1, function(x) sum(x * coef_values))
risk_scores <- as.data.frame(risk_scores)
data$risk_scores <- risk_scores$risk_scores
res.cut <- surv_cutpoint(data, time = "overall_survival", event = "deceased", 
                         variables = c("risk_scores"))
cutpoint <- res.cut[["cutpoint"]][["cutpoint"]]
lusc_final_genes <- readRDS("~/final_results/lusc_final_genes.rds")
lusc_final_genes <- lusc_final_genes[rownames(univ_results_df),]
univ_results_df$cell_type <- lusc_final_genes$cell_type

data$risk_scores <- ifelse(data$risk_scores > cutpoint, "HIGH", "LOW")
fit_risk_scores <- survfit(Surv(overall_survival, deceased) ~ risk_scores, data = data)
ggsurvplot(fit_risk_scores, data, pval = TRUE, risk.table = TRUE)

coxa <- coxph(Surv(overall_survival, deceased) ~ risk_scores, data = data)
coxa
## Call:
## coxph(formula = Surv(overall_survival, deceased) ~ risk_scores, 
##     data = data)
## 
##                   coef exp(coef) se(coef)      z        p
## risk_scoresLOW -0.9577    0.3838   0.1901 -5.037 4.72e-07
## 
## Likelihood ratio test=24.57  on 1 df, p=7.179e-07
## n= 144, number of events= 143 
##    (253 observations deleted due to missingness)
saveRDS(univ_results_df, "06-03-2025_lusc_final_genes.rds")
reactable(univ_results_df)