library(survival)
library(survminer)
## Loading required package: ggplot2
## Loading required package: ggpubr
## 
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
## 
##     myeloma
adeno <- readRDS("~/final_results/14_02_2025_deconvolution_adeno.rds")
deconvolution_adeno_scaled <- adeno / rowSums(adeno) * 100
luad_clinic_for_bulk <- readRDS("~/final_results/luad_clinic_for_bulk.rds")
data <- cbind(luad_clinic_for_bulk, deconvolution_adeno_scaled)

data$overall_survival <- as.numeric(data$overall_survival)
data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
cell_types <- c("Alveolar cell type 1","Alveolar cell type 2","B cell", "B cell dividing","cDC1", "Ciliated",  "Club", "DC mature", "Endothelial cell arterial", "Endothelial cell capillary", "Endothelial cell lymphatic", "Endothelial cell venous",  "Fibroblast alveolar", "Fibroblast peribronchial", 
                "Macrophage", "Macrophage alveolar", "Mast cell", 
                "Mesothelial", "Monocyte non-classical", 
                "Myeloid dividing", "Neutrophils",
                "NK cell dividing", "pDC", "Pericyte", 
                "Plasma cell", "Plasma cell dividing","ROS1+ healthy epithelial", 
                "Smooth muscle cell", "T cell CD4", 
                "T cell CD4 dividing", "T cell CD8 activated", "T cell CD8 dividing", "T cell CD8 naive", "T cell CD8 terminally exhausted", 
                "T cell NK-like", "T cell regulatory", "Transitional Club/AT2")

p_values_df <- data.frame(Cell_Type = character(), P_Value = numeric(), stringsAsFactors = FALSE)

for (cell in cell_types) {
    res.cut <- surv_cutpoint(data, time = "overall_survival", event = "deceased", variables = c(cell))
    cutpoint <- res.cut[["cutpoint"]][["cutpoint"]]
  
    group_col <- gsub(" ", "_", cell)  
    data[[group_col]] <- ifelse(data[[cell]] > cutpoint, "HIGH", "LOW")
    
    fit <- survfit(Surv(overall_survival, deceased) ~ data[[group_col]], data = data)
    
    ggsurvplot(fit, data = data, pval = TRUE, risk.table = TRUE)
    
    surv_diff <- survdiff(Surv(overall_survival, deceased) ~ data[[group_col]], data = data)
    p_value <- 1 - pchisq(surv_diff$chisq, length(surv_diff$n) - 1)
    
    p_values_df <- rbind(p_values_df, data.frame(Cell_Type = cell, P_Value = p_value, stringsAsFactors = FALSE))
}

print(p_values_df)
##                          Cell_Type      P_Value
## 1             Alveolar cell type 1 4.435617e-04
## 2             Alveolar cell type 2 1.738826e-03
## 3                           B cell 5.624679e-02
## 4                  B cell dividing 4.341737e-01
## 5                             cDC1 1.604888e-01
## 6                         Ciliated 2.987807e-01
## 7                             Club 2.696280e-02
## 8                        DC mature 1.095731e-01
## 9        Endothelial cell arterial 1.082150e-02
## 10      Endothelial cell capillary 1.486327e-01
## 11      Endothelial cell lymphatic 2.820878e-01
## 12         Endothelial cell venous 2.207997e-01
## 13             Fibroblast alveolar 3.241061e-02
## 14        Fibroblast peribronchial 2.800372e-01
## 15                      Macrophage 1.031021e-06
## 16             Macrophage alveolar 1.665885e-02
## 17                       Mast cell 2.962287e-02
## 18                     Mesothelial 1.955151e-02
## 19          Monocyte non-classical 7.372882e-02
## 20                Myeloid dividing 7.342144e-02
## 21                     Neutrophils 6.778062e-04
## 22                NK cell dividing 8.748413e-02
## 23                             pDC 9.412299e-02
## 24                        Pericyte 7.036920e-04
## 25                     Plasma cell 1.265291e-01
## 26            Plasma cell dividing 8.531222e-01
## 27        ROS1+ healthy epithelial 2.054377e-01
## 28              Smooth muscle cell 7.583719e-01
## 29                      T cell CD4 1.968790e-01
## 30             T cell CD4 dividing 2.779924e-01
## 31            T cell CD8 activated 2.663116e-03
## 32             T cell CD8 dividing 6.752865e-03
## 33                T cell CD8 naive 3.254295e-02
## 34 T cell CD8 terminally exhausted 5.993707e-02
## 35                  T cell NK-like 3.822932e-02
## 36               T cell regulatory 7.855282e-02
## 37           Transitional Club/AT2 2.401298e-01
p_values_df$p.adj <- p.adjust(p_values_df$P_Value, method = "BH")
selected_cell_types <- p_values_df[p_values_df$p.adj < 0.05,]
library(reactable)
reactable(selected_cell_types)
library(survival)
library(survminer)
adeno <- readRDS("~/final_results/14_02_2025_deconvolution_adeno.rds")
deconvolution_adeno_scaled <- adeno / rowSums(adeno) * 100
luad_clinic_for_bulk <- readRDS("~/final_results/luad_clinic_for_bulk.rds")
signature_adeno_matrix <- readRDS("~/final_results/14_02_2025_signature_adeno_matrix.rds")
data <- cbind(luad_clinic_for_bulk, deconvolution_adeno_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))

data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
res.cut <- surv_cutpoint(data, time = "overall_survival", event = "deceased", 
                         variables = c("Alveolar_cell_type_1"))
cutpoint <- res.cut[["cutpoint"]][["cutpoint"]]
data$Alveolar_cell_type_1 <- ifelse(data$Alveolar_cell_type_1 > cutpoint, "HIGH", "LOW")
fit_Alveolar_cell_type_1 <- survfit(Surv(overall_survival, deceased) ~ Alveolar_cell_type_1, data = data)
ggsurvplot(fit_Alveolar_cell_type_1, 
           data, 
           pval = TRUE,   
           risk.table = TRUE)  

data <- cbind(luad_clinic_for_bulk, deconvolution_adeno_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))
colnames(data) <- gsub("-", "_", colnames(data))
coxa <- coxph(Surv(overall_survival, deceased) ~ Alveolar_cell_type_1, data = data) 
coxa
## Call:
## coxph(formula = Surv(overall_survival, deceased) ~ Alveolar_cell_type_1, 
##     data = data)
## 
##                         coef exp(coef) se(coef)     z      p
## Alveolar_cell_type_1 -0.3191    0.7268   0.1783 -1.79 0.0735
## 
## Likelihood ratio test=3.73  on 1 df, p=0.05351
## n= 91, number of events= 91 
##    (152 observations deleted due to missingness)
library(survival)
library(reactable)
signature_adeno_matrix <- as.data.frame(signature_adeno_matrix)
Alveolar_cell_type_1_exp <- signature_adeno_matrix$`Alveolar cell type 1`
other_cells_exp <- rowMeans(signature_adeno_matrix[, colnames(signature_adeno_matrix) != "Alveolar_cell_type_1"])
Alveolar_cell_type_1_luad_filtered_genes <- signature_adeno_matrix[(Alveolar_cell_type_1_exp - other_cells_exp) / other_cells_exp > 0.90, ]
ad_bulk <- readRDS("~/final_results/luad_bulk.rds")
Alveolar_cell_type_1 <- intersect(rownames(Alveolar_cell_type_1_luad_filtered_genes), rownames(ad_bulk))
Alveolar_cell_type_1 <- ad_bulk[Alveolar_cell_type_1,]
Alveolar_cell_type_1 <- t(Alveolar_cell_type_1)
Alveolar_cell_type_1 <- as.data.frame(Alveolar_cell_type_1)
luad_BULK_CLINIC  <- readRDS("~/final_results/luad_clinic_for_bulk.rds")
luad_BULK_CLINIC <- as.data.frame(luad_BULK_CLINIC)
select_col <- c("overall_survival", "vital_status")
clinic <- luad_BULK_CLINIC[,select_col]
data <- cbind(clinic, Alveolar_cell_type_1)
data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
library(survival)
exclude_cols <- c("overall_survival", "vital_status", "deceased")
gene_cols <- setdiff(colnames(data), exclude_cols)
cox_results <- list()
for (gene in gene_cols) {
  formula <- as.formula(paste("Surv(overall_survival, deceased) ~", gene))
  cox_model <- coxph(formula, data = data)
  cox_results[[gene]] <- summary(cox_model)
}

genes <- c()
coeffs <- c()
for (gene in names(cox_results)) {
  coef_value <- cox_results[[gene]]$coefficients[1, "coef"] 
  
  if (!is.na(coef_value) && coef_value < 0) {  
    genes <- c(genes, gene)
    coeffs <- c(coeffs, coef_value) 
  }
}

library(biomaRt)
ensembl <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl", version = 105)
gene_annotation <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol"), 
                         filters = "ensembl_gene_id", 
                         values = genes, 
                         mart = ensembl)
ort <- intersect(genes, rownames(signature_adeno_matrix))
sig <- signature_adeno_matrix[ort, ]

Alveolar_cell_type_1_gene_df <- data.frame(
  ENSG_ID = genes,   
  Gene_Name = gene_annotation$hgnc_symbol,  
  Coefficient = coeffs,
  Cell_Type = "Alveolar_cell_type_1"
)
hazard_ratios <- sapply(genes, function(gene) {
  if (gene %in% names(cox_results)) {
    return(cox_results[[gene]]$coefficients[1, "exp(coef)"]) 
  } else {
    return(NA)  
  }
})

Alveolar_cell_type_1_gene_df$Hazard_Ratio <- hazard_ratios
Alveolar_cell_type_1_gene_df$exp.level <- sig$`Alveolar cell type 1`

reactable(Alveolar_cell_type_1_gene_df)
library(survival)
library(survminer)
adeno <- readRDS("~/final_results/14_02_2025_deconvolution_adeno.rds")
deconvolution_adeno_scaled <- adeno / rowSums(adeno) * 100
luad_clinic_for_bulk <- readRDS("~/final_results/luad_clinic_for_bulk.rds")
signature_adeno_matrix <- readRDS("~/final_results/14_02_2025_signature_adeno_matrix.rds")
data <- cbind(luad_clinic_for_bulk, deconvolution_adeno_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))

data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
res.cut <- surv_cutpoint(data, time = "overall_survival", event = "deceased", 
                         variables = c("Alveolar_cell_type_2"))
cutpoint <- res.cut[["cutpoint"]][["cutpoint"]]
data$Alveolar_cell_type_2 <- ifelse(data$Alveolar_cell_type_2 > cutpoint, "HIGH", "LOW")
fit_Alveolar_cell_type_2 <- survfit(Surv(overall_survival, deceased) ~ Alveolar_cell_type_2, data = data)
ggsurvplot(fit_Alveolar_cell_type_2, 
           data, 
           pval = TRUE,   
           risk.table = TRUE)  

data <- cbind(luad_clinic_for_bulk, deconvolution_adeno_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))
colnames(data) <- gsub("-", "_", colnames(data))
coxa <- coxph(Surv(overall_survival, deceased) ~ Alveolar_cell_type_2, data = data) 
coxa
## Call:
## coxph(formula = Surv(overall_survival, deceased) ~ Alveolar_cell_type_2, 
##     data = data)
## 
##                          coef exp(coef) se(coef)      z     p
## Alveolar_cell_type_2 -0.04119   0.95965  0.02835 -1.453 0.146
## 
## Likelihood ratio test=2.42  on 1 df, p=0.12
## n= 91, number of events= 91 
##    (152 observations deleted due to missingness)
library(survival)
library(reactable)
signature_adeno_matrix <- as.data.frame(signature_adeno_matrix)
Alveolar_cell_type_2_exp <- signature_adeno_matrix$`Alveolar cell type 2`
other_cells_exp <- rowMeans(signature_adeno_matrix[, colnames(signature_adeno_matrix) != "Alveolar_cell_type_2"])
Alveolar_cell_type_2_luad_filtered_genes <- signature_adeno_matrix[(Alveolar_cell_type_2_exp - other_cells_exp) / other_cells_exp > 0.90, ]
ad_bulk <- readRDS("~/final_results/luad_bulk.rds")
Alveolar_cell_type_2 <- intersect(rownames(Alveolar_cell_type_2_luad_filtered_genes), rownames(ad_bulk))
Alveolar_cell_type_2 <- ad_bulk[Alveolar_cell_type_2,]
Alveolar_cell_type_2 <- t(Alveolar_cell_type_2)
Alveolar_cell_type_2 <- as.data.frame(Alveolar_cell_type_2)
luad_BULK_CLINIC <- as.data.frame(luad_BULK_CLINIC)
select_col <- c("overall_survival", "vital_status")
clinic <- luad_BULK_CLINIC[,select_col]
data <- cbind(clinic, Alveolar_cell_type_2)
data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
library(survival)
exclude_cols <- c("overall_survival", "vital_status", "deceased")
gene_cols <- setdiff(colnames(data), exclude_cols)
cox_results <- list()
for (gene in gene_cols) {
  formula <- as.formula(paste("Surv(overall_survival, deceased) ~", gene))
  cox_model <- coxph(formula, data = data)
  cox_results[[gene]] <- summary(cox_model)
}

genes <- c()
coeffs <- c()
for (gene in names(cox_results)) {
  coef_value <- cox_results[[gene]]$coefficients[1, "coef"] 
  
  if (!is.na(coef_value) && coef_value < 0) {  
    genes <- c(genes, gene)
    coeffs <- c(coeffs, coef_value) 
  }
}

library(biomaRt)
ensembl <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl", version = 105)
gene_annotation <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol"), 
                         filters = "ensembl_gene_id", 
                         values = genes, 
                         mart = ensembl)
ort <- intersect(genes, rownames(signature_adeno_matrix))
sig <- signature_adeno_matrix[ort, ]

Alveolar_cell_type_2_gene_df <- data.frame(
  ENSG_ID = genes,   
  Gene_Name = gene_annotation$hgnc_symbol,  
  Coefficient = coeffs,
  Cell_Type = "Alveolar_cell_type_2"
)
hazard_ratios <- sapply(genes, function(gene) {
  if (gene %in% names(cox_results)) {
    return(cox_results[[gene]]$coefficients[1, "exp(coef)"]) 
  } else {
    return(NA)  
  }
})

Alveolar_cell_type_2_gene_df$Hazard_Ratio <- hazard_ratios
Alveolar_cell_type_2_gene_df$exp.level <- sig$`Alveolar cell type 2`

reactable(Alveolar_cell_type_2_gene_df)
library(survival)
library(survminer)
adeno <- readRDS("~/final_results/14_02_2025_deconvolution_adeno.rds")
deconvolution_adeno_scaled <- adeno / rowSums(adeno) * 100
luad_clinic_for_bulk <- readRDS("~/final_results/luad_clinic_for_bulk.rds")
signature_adeno_matrix <- readRDS("~/final_results/14_02_2025_signature_adeno_matrix.rds")
data <- cbind(luad_clinic_for_bulk, deconvolution_adeno_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))

data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
res.cut <- surv_cutpoint(data, time = "overall_survival", event = "deceased", 
                         variables = c("Macrophage"))
cutpoint <- res.cut[["cutpoint"]][["cutpoint"]]
data$Macrophage <- ifelse(data$Macrophage > cutpoint, "HIGH", "LOW")
fit_Macrophage <- survfit(Surv(overall_survival, deceased) ~ Macrophage, data = data)
ggsurvplot(fit_Macrophage, 
           data, 
           pval = TRUE,   
           risk.table = TRUE)  

data <- cbind(luad_clinic_for_bulk, deconvolution_adeno_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))
colnames(data) <- gsub("-", "_", colnames(data))
coxa <- coxph(Surv(overall_survival, deceased) ~ Macrophage, data = data) 
coxa
## Call:
## coxph(formula = Surv(overall_survival, deceased) ~ Macrophage, 
##     data = data)
## 
##                coef exp(coef) se(coef)      z     p
## Macrophage -0.04942   0.95178  0.03187 -1.551 0.121
## 
## Likelihood ratio test=2.54  on 1 df, p=0.1113
## n= 91, number of events= 91 
##    (152 observations deleted due to missingness)
library(survival)
library(reactable)
signature_adeno_matrix <- as.data.frame(signature_adeno_matrix)
Macrophage_exp <- signature_adeno_matrix$Macrophage
other_cells_exp <- rowMeans(signature_adeno_matrix[, colnames(signature_adeno_matrix) != "Macrophage"])
Macrophage_luad_filtered_genes <- signature_adeno_matrix[(Macrophage_exp - other_cells_exp) / other_cells_exp > 0.90, ]
ad_bulk <- readRDS("~/final_results/luad_bulk.rds")
Macrophage <- intersect(rownames(Macrophage_luad_filtered_genes), rownames(ad_bulk))
Macrophage <- ad_bulk[Macrophage,]
Macrophage <- t(Macrophage)
Macrophage <- as.data.frame(Macrophage)
luad_BULK_CLINIC <- as.data.frame(luad_BULK_CLINIC)
select_col <- c("overall_survival", "vital_status")
clinic <- luad_BULK_CLINIC[,select_col]
data <- cbind(clinic, Macrophage)
data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
library(survival)
exclude_cols <- c("overall_survival", "vital_status", "deceased")
gene_cols <- setdiff(colnames(data), exclude_cols)
cox_results <- list()
for (gene in gene_cols) {
  formula <- as.formula(paste("Surv(overall_survival, deceased) ~", gene))
  cox_model <- coxph(formula, data = data)
  cox_results[[gene]] <- summary(cox_model)
}

genes <- c()
coeffs <- c()
for (gene in names(cox_results)) {
  coef_value <- cox_results[[gene]]$coefficients[1, "coef"] 
  
  if (!is.na(coef_value) && coef_value < 0) {  
    genes <- c(genes, gene)
    coeffs <- c(coeffs, coef_value) 
  }
}

library(biomaRt)
ensembl <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl", version = 105)
gene_annotation <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol"), 
                         filters = "ensembl_gene_id", 
                         values = genes, 
                         mart = ensembl)
ort <- intersect(genes, rownames(signature_adeno_matrix))
sig <- signature_adeno_matrix[ort, ]

Macrophage_gene_df <- data.frame(
  ENSG_ID = genes,   
  Gene_Name = gene_annotation$hgnc_symbol,  
  Coefficient = coeffs,
  Cell_Type = "Macrophage"
)
hazard_ratios <- sapply(genes, function(gene) {
  if (gene %in% names(cox_results)) {
    return(cox_results[[gene]]$coefficients[1, "exp(coef)"]) 
  } else {
    return(NA)  
  }
})

Macrophage_gene_df$Hazard_Ratio <- hazard_ratios
Macrophage_gene_df$exp.level <- sig$Macrophage

reactable(Macrophage_gene_df)
library(survival)
library(survminer)
adeno <- readRDS("~/final_results/14_02_2025_deconvolution_adeno.rds")
deconvolution_adeno_scaled <- adeno / rowSums(adeno) * 100
luad_clinic_for_bulk <- readRDS("~/final_results/luad_clinic_for_bulk.rds")
signature_adeno_matrix <- readRDS("~/final_results/14_02_2025_signature_adeno_matrix.rds")
data <- cbind(luad_clinic_for_bulk, deconvolution_adeno_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))

data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
res.cut <- surv_cutpoint(data, time = "overall_survival", event = "deceased", 
                         variables = c("Neutrophils"))
cutpoint <- res.cut[["cutpoint"]][["cutpoint"]]
data$Neutrophils <- ifelse(data$Neutrophils > cutpoint, "HIGH", "LOW")
fit_Neutrophils <- survfit(Surv(overall_survival, deceased) ~ Neutrophils, data = data)
ggsurvplot(fit_Neutrophils, 
           data, 
           pval = TRUE,   
           risk.table = TRUE)  

data <- cbind(luad_clinic_for_bulk, deconvolution_adeno_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))
colnames(data) <- gsub("-", "_", colnames(data))
coxa <- coxph(Surv(overall_survival, deceased) ~ Neutrophils, data = data) 
coxa
## Call:
## coxph(formula = Surv(overall_survival, deceased) ~ Neutrophils, 
##     data = data)
## 
##               coef exp(coef) se(coef)     z     p
## Neutrophils 0.1280    1.1365   0.1633 0.784 0.433
## 
## Likelihood ratio test=0.55  on 1 df, p=0.4568
## n= 91, number of events= 91 
##    (152 observations deleted due to missingness)
library(survival)
library(reactable)
signature_adeno_matrix <- as.data.frame(signature_adeno_matrix)
Neutrophils_exp <- signature_adeno_matrix$Neutrophils
other_cells_exp <- rowMeans(signature_adeno_matrix[, colnames(signature_adeno_matrix) != "Neutrophils"])
Neutrophils_luad_filtered_genes <- signature_adeno_matrix[(Neutrophils_exp - other_cells_exp) / other_cells_exp > 0.90, ]
ad_bulk <- readRDS("~/final_results/luad_bulk.rds")
Neutrophils <- intersect(rownames(Neutrophils_luad_filtered_genes), rownames(ad_bulk))
Neutrophils <- ad_bulk[Neutrophils,]
Neutrophils <- t(Neutrophils)
Neutrophils <- as.data.frame(Neutrophils)
luad_BULK_CLINIC <- as.data.frame(luad_BULK_CLINIC)
select_col <- c("overall_survival", "vital_status")
clinic <- luad_BULK_CLINIC[,select_col]
data <- cbind(clinic, Neutrophils)
data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
library(survival)
exclude_cols <- c("overall_survival", "vital_status", "deceased")
gene_cols <- setdiff(colnames(data), exclude_cols)
cox_results <- list()
for (gene in gene_cols) {
  formula <- as.formula(paste("Surv(overall_survival, deceased) ~", gene))
  cox_model <- coxph(formula, data = data)
  cox_results[[gene]] <- summary(cox_model)
}

genes <- c()
coeffs <- c()
for (gene in names(cox_results)) {
  coef_value <- cox_results[[gene]]$coefficients[1, "coef"] 
  
  if (!is.na(coef_value) && coef_value < 0) {  
    genes <- c(genes, gene)
    coeffs <- c(coeffs, coef_value) 
  }
}

library(biomaRt)
ensembl <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl", version = 105)
gene_annotation <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol"), 
                         filters = "ensembl_gene_id", 
                         values = genes, 
                         mart = ensembl)
ort <- intersect(genes, rownames(signature_adeno_matrix))
sig <- signature_adeno_matrix[ort, ]

Neutrophils_gene_df <- data.frame(
  ENSG_ID = genes,   
  Gene_Name = gene_annotation$hgnc_symbol,  
  Coefficient = coeffs,
  Cell_Type = "Neutrophils"
)
hazard_ratios <- sapply(genes, function(gene) {
  if (gene %in% names(cox_results)) {
    return(cox_results[[gene]]$coefficients[1, "exp(coef)"]) 
  } else {
    return(NA)  
  }
})

Neutrophils_gene_df$Hazard_Ratio <- hazard_ratios
Neutrophils_gene_df$exp.level <- sig$Neutrophils

reactable(Neutrophils_gene_df)
library(survival)
library(survminer)
adeno <- readRDS("~/final_results/14_02_2025_deconvolution_adeno.rds")
deconvolution_adeno_scaled <- adeno / rowSums(adeno) * 100
luad_clinic_for_bulk <- readRDS("~/final_results/luad_clinic_for_bulk.rds")
signature_adeno_matrix <- readRDS("~/final_results/14_02_2025_signature_adeno_matrix.rds")
data <- cbind(luad_clinic_for_bulk, deconvolution_adeno_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))

data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
res.cut <- surv_cutpoint(data, time = "overall_survival", event = "deceased", 
                         variables = c("Pericyte"))
cutpoint <- res.cut[["cutpoint"]][["cutpoint"]]
data$Pericyte <- ifelse(data$Pericyte > cutpoint, "HIGH", "LOW")
fit_Pericyte <- survfit(Surv(overall_survival, deceased) ~ Pericyte, data = data)
ggsurvplot(fit_Pericyte, 
           data, 
           pval = TRUE,   
           risk.table = TRUE)  

data <- cbind(luad_clinic_for_bulk, deconvolution_adeno_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))
colnames(data) <- gsub("-", "_", colnames(data))
coxa <- coxph(Surv(overall_survival, deceased) ~ Pericyte, data = data) 
coxa
## Call:
## coxph(formula = Surv(overall_survival, deceased) ~ Pericyte, 
##     data = data)
## 
##             coef exp(coef) se(coef)      z     p
## Pericyte -0.2611    0.7702   0.1617 -1.615 0.106
## 
## Likelihood ratio test=2.67  on 1 df, p=0.1023
## n= 91, number of events= 91 
##    (152 observations deleted due to missingness)
library(survival)
library(reactable)
signature_adeno_matrix <- as.data.frame(signature_adeno_matrix)
Pericyte_exp <- signature_adeno_matrix$Pericyte
other_cells_exp <- rowMeans(signature_adeno_matrix[, colnames(signature_adeno_matrix) != "Pericyte"])
Pericyte_luad_filtered_genes <- signature_adeno_matrix[(Pericyte_exp - other_cells_exp) / other_cells_exp > 0.90, ]
ad_bulk <- readRDS("~/final_results/luad_bulk.rds")
Pericyte <- intersect(rownames(Pericyte_luad_filtered_genes), rownames(ad_bulk))
Pericyte <- ad_bulk[Pericyte,]
Pericyte <- t(Pericyte)
Pericyte <- as.data.frame(Pericyte)
luad_BULK_CLINIC <- as.data.frame(luad_BULK_CLINIC)
select_col <- c("overall_survival", "vital_status")
clinic <- luad_BULK_CLINIC[,select_col]
data <- cbind(clinic, Pericyte)
data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
library(survival)
exclude_cols <- c("overall_survival", "vital_status", "deceased")
gene_cols <- setdiff(colnames(data), exclude_cols)
cox_results <- list()
for (gene in gene_cols) {
  formula <- as.formula(paste("Surv(overall_survival, deceased) ~", gene))
  cox_model <- coxph(formula, data = data)
  cox_results[[gene]] <- summary(cox_model)
}

genes <- c()
coeffs <- c()
for (gene in names(cox_results)) {
  coef_value <- cox_results[[gene]]$coefficients[1, "coef"] 
  
  if (!is.na(coef_value) && coef_value < 0) {  
    genes <- c(genes, gene)
    coeffs <- c(coeffs, coef_value) 
  }
}

library(biomaRt)
ensembl <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl", version = 105)
gene_annotation <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol"), 
                         filters = "ensembl_gene_id", 
                         values = genes, 
                         mart = ensembl)
ort <- intersect(genes, rownames(signature_adeno_matrix))
sig <- signature_adeno_matrix[ort, ]

Pericyte_gene_df <- data.frame(
  ENSG_ID = genes,   
  Gene_Name = gene_annotation$hgnc_symbol,  
  Coefficient = coeffs,
  Cell_Type = "Pericyte"
)
hazard_ratios <- sapply(genes, function(gene) {
  if (gene %in% names(cox_results)) {
    return(cox_results[[gene]]$coefficients[1, "exp(coef)"]) 
  } else {
    return(NA)  
  }
})

Pericyte_gene_df$Hazard_Ratio <- hazard_ratios
Pericyte_gene_df$exp.level <- sig$Pericyte

reactable(Pericyte_gene_df)
library(survival)
library(survminer)
adeno <- readRDS("~/final_results/14_02_2025_deconvolution_adeno.rds")
deconvolution_adeno_scaled <- adeno / rowSums(adeno) * 100
luad_clinic_for_bulk <- readRDS("~/final_results/luad_clinic_for_bulk.rds")
signature_adeno_matrix <- readRDS("~/final_results/14_02_2025_signature_adeno_matrix.rds")
data <- cbind(luad_clinic_for_bulk, deconvolution_adeno_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))

data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
res.cut <- surv_cutpoint(data, time = "overall_survival", event = "deceased", 
                         variables = c("T_cell_CD8_activated"))
cutpoint <- res.cut[["cutpoint"]][["cutpoint"]]
data$T_cell_CD8_activated <- ifelse(data$T_cell_CD8_activated > cutpoint, "HIGH", "LOW")
fit_T_cell_CD8_activated <- survfit(Surv(overall_survival, deceased) ~ T_cell_CD8_activated, data = data)
ggsurvplot(fit_T_cell_CD8_activated, 
           data, 
           pval = TRUE,   
           risk.table = TRUE)  

data <- cbind(luad_clinic_for_bulk, deconvolution_adeno_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))
colnames(data) <- gsub("-", "_", colnames(data))
coxa <- coxph(Surv(overall_survival, deceased) ~ T_cell_CD8_activated, data = data) 
coxa
## Call:
## coxph(formula = Surv(overall_survival, deceased) ~ T_cell_CD8_activated, 
##     data = data)
## 
##                         coef exp(coef) se(coef)     z     p
## T_cell_CD8_activated 0.09551   1.10022  0.07094 1.346 0.178
## 
## Likelihood ratio test=1.52  on 1 df, p=0.2175
## n= 91, number of events= 91 
##    (152 observations deleted due to missingness)
library(survival)
library(reactable)
signature_adeno_matrix <- as.data.frame(signature_adeno_matrix)
T_cell_CD8_activated_exp <- signature_adeno_matrix$`T cell CD8 activated`
other_cells_exp <- rowMeans(signature_adeno_matrix[, colnames(signature_adeno_matrix) != "T_cell_CD8_activated"])
T_cell_CD8_activated_luad_filtered_genes <- signature_adeno_matrix[(T_cell_CD8_activated_exp - other_cells_exp) / other_cells_exp > 0.90, ]
ad_bulk <- readRDS("~/final_results/luad_bulk.rds")
T_cell_CD8_activated <- intersect(rownames(T_cell_CD8_activated_luad_filtered_genes), rownames(ad_bulk))
T_cell_CD8_activated <- ad_bulk[T_cell_CD8_activated,]
T_cell_CD8_activated <- t(T_cell_CD8_activated)
T_cell_CD8_activated <- as.data.frame(T_cell_CD8_activated)
luad_BULK_CLINIC <- readRDS("~/final_results/luad_clinic_for_bulk.rds")
luad_BULK_CLINIC <- as.data.frame(luad_BULK_CLINIC)
select_col <- c("overall_survival", "vital_status")
clinic <- luad_BULK_CLINIC[,select_col]
data <- cbind(clinic, T_cell_CD8_activated)
data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
library(survival)
exclude_cols <- c("overall_survival", "vital_status", "deceased")
gene_cols <- setdiff(colnames(data), exclude_cols)
cox_results <- list()
for (gene in gene_cols) {
  formula <- as.formula(paste("Surv(overall_survival, deceased) ~", gene))
  cox_model <- coxph(formula, data = data)
  cox_results[[gene]] <- summary(cox_model)
}

genes <- c()
coeffs <- c()
for (gene in names(cox_results)) {
  coef_value <- cox_results[[gene]]$coefficients[1, "coef"] 
  
  if (!is.na(coef_value) && coef_value > 0) {  
    genes <- c(genes, gene)
    coeffs <- c(coeffs, coef_value) 
  }
}

library(biomaRt)
ensembl <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl", version = 105)
gene_annotation <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol"), 
                         filters = "ensembl_gene_id", 
                         values = genes, 
                         mart = ensembl)
ort <- intersect(genes, rownames(signature_adeno_matrix))
sig <- signature_adeno_matrix[ort, ]

T_cell_CD8_activated_gene_df <- data.frame(
  ENSG_ID = genes,   
  Gene_Name = gene_annotation$hgnc_symbol,  
  Coefficient = coeffs,
  Cell_Type = "T_cell_CD8_activated"
)
hazard_ratios <- sapply(genes, function(gene) {
  if (gene %in% names(cox_results)) {
    return(cox_results[[gene]]$coefficients[1, "exp(coef)"]) 
  } else {
    return(NA)  
  }
})

T_cell_CD8_activated_gene_df$Hazard_Ratio <- hazard_ratios
T_cell_CD8_activated_gene_df$exp.level <- sig$`T cell CD8 activated`

reactable(T_cell_CD8_activated_gene_df)
library(survival)
library(survminer)
adeno <- readRDS("~/final_results/14_02_2025_deconvolution_adeno.rds")
deconvolution_adeno_scaled <- adeno / rowSums(adeno) * 100
luad_clinic_for_bulk <- readRDS("~/final_results/luad_clinic_for_bulk.rds")
signature_adeno_matrix <- readRDS("~/final_results/14_02_2025_signature_adeno_matrix.rds")
data <- cbind(luad_clinic_for_bulk, deconvolution_adeno_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))

data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
res.cut <- surv_cutpoint(data, time = "overall_survival", event = "deceased", 
                         variables = c("T_cell_CD8_dividing"))
cutpoint <- res.cut[["cutpoint"]][["cutpoint"]]
data$T_cell_CD8_dividing <- ifelse(data$T_cell_CD8_dividing > cutpoint, "HIGH", "LOW")
fit_T_cell_CD8_dividing <- survfit(Surv(overall_survival, deceased) ~ T_cell_CD8_dividing, data = data)
ggsurvplot(fit_T_cell_CD8_dividing, 
           data, 
           pval = TRUE,   
           risk.table = TRUE)  

data <- cbind(luad_clinic_for_bulk, deconvolution_adeno_scaled)
colnames(data) <- gsub(" ", "_", colnames(data))
colnames(data) <- gsub("-", "_", colnames(data))
coxa <- coxph(Surv(overall_survival, deceased) ~ T_cell_CD8_dividing, data = data) 
coxa
## Call:
## coxph(formula = Surv(overall_survival, deceased) ~ T_cell_CD8_dividing, 
##     data = data)
## 
##                        coef exp(coef) se(coef)    z      p
## T_cell_CD8_dividing 0.18378   1.20176  0.08876 2.07 0.0384
## 
## Likelihood ratio test=3.98  on 1 df, p=0.04618
## n= 91, number of events= 91 
##    (152 observations deleted due to missingness)
library(survival)
library(reactable)
signature_adeno_matrix <- as.data.frame(signature_adeno_matrix)
T_cell_CD8_dividing_exp <- signature_adeno_matrix$`T cell CD8 dividing`
other_cells_exp <- rowMeans(signature_adeno_matrix[, colnames(signature_adeno_matrix) != "T_cell_CD8_dividing"])
T_cell_CD8_dividing_luad_filtered_genes <- signature_adeno_matrix[(T_cell_CD8_dividing_exp - other_cells_exp) / other_cells_exp > 0.90, ]
ad_bulk <- readRDS("~/final_results/luad_bulk.rds")
T_cell_CD8_dividing <- intersect(rownames(T_cell_CD8_dividing_luad_filtered_genes), rownames(ad_bulk))
T_cell_CD8_dividing <- ad_bulk[T_cell_CD8_dividing,]
T_cell_CD8_dividing <- t(T_cell_CD8_dividing)
T_cell_CD8_dividing <- as.data.frame(T_cell_CD8_dividing)
luad_BULK_CLINIC <- readRDS("~/final_results/luad_clinic_for_bulk.rds")
luad_BULK_CLINIC <- as.data.frame(luad_BULK_CLINIC)
select_col <- c("overall_survival", "vital_status")
clinic <- luad_BULK_CLINIC[,select_col]
data <- cbind(clinic, T_cell_CD8_dividing)
data$deceased <- ifelse(data$vital_status == "Alive", FALSE, TRUE)
library(survival)
exclude_cols <- c("overall_survival", "vital_status", "deceased")
gene_cols <- setdiff(colnames(data), exclude_cols)
cox_results <- list()
for (gene in gene_cols) {
  formula <- as.formula(paste("Surv(overall_survival, deceased) ~", gene))
  cox_model <- coxph(formula, data = data)
  cox_results[[gene]] <- summary(cox_model)
}

genes <- c()
coeffs <- c()
for (gene in names(cox_results)) {
  coef_value <- cox_results[[gene]]$coefficients[1, "coef"] 
  
  if (!is.na(coef_value) && coef_value > 0) {  
    genes <- c(genes, gene)
    coeffs <- c(coeffs, coef_value) 
  }
}

library(biomaRt)
ensembl <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl", version = 105)
gene_annotation <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol"), 
                         filters = "ensembl_gene_id", 
                         values = genes, 
                         mart = ensembl)
ort <- intersect(genes, rownames(signature_adeno_matrix))
sig <- signature_adeno_matrix[ort, ]

T_cell_CD8_dividing_gene_df <- data.frame(
  ENSG_ID = genes,   
  Gene_Name = gene_annotation$hgnc_symbol,  
  Coefficient = coeffs,
  Cell_Type = "T_cell_CD8_dividing"
)
hazard_ratios <- sapply(genes, function(gene) {
  if (gene %in% names(cox_results)) {
    return(cox_results[[gene]]$coefficients[1, "exp(coef)"]) 
  } else {
    return(NA)  
  }
})

T_cell_CD8_dividing_gene_df$Hazard_Ratio <- hazard_ratios
T_cell_CD8_dividing_gene_df$exp.level <- sig$`T cell CD8 dividing`

reactable(T_cell_CD8_dividing_gene_df)
library(survival)
library(survminer)
library(ggplot2)
library(biomaRt)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:biomaRt':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df_list <- list(Macrophage_gene_df, Neutrophils_gene_df, T_cell_CD8_activated_gene_df, T_cell_CD8_dividing_gene_df, Pericyte_gene_df)
df_names <- c("Macrophage", "Neutrophils", "T_cell_CD8_activated", "T_cell_CD8_dividing", "Pericyte")

final_genes <- unique(unlist(lapply(df_list, function(df) {
  if (!is.null(df)) {
    top5_genes <- df %>%
      arrange(desc(exp.level)) %>%
      head(5)
    
  }
})))

filtered_df_list <- lapply(df_list, function(df) {
  if (!is.null(df)) {
    df %>% filter(ENSG_ID %in% final_genes)
  } else {
    return(NULL)
  }
})

final <- unique(unlist(lapply(filtered_df_list, function(df) {
  if (!is.null(df)) {
    top3_genes <- df %>%
      arrange(desc(Hazard_Ratio)) %>%
      head(3)
    
  }
})))


ensembl_ids <- final

adeno <- readRDS("~/final_results/luad_bulk.rds") |> as.data.frame() |> t() |> as.data.frame()
luad_clinic_for_bulk <- readRDS("~/final_results/luad_clinic_for_bulk.rds")

data <- cbind(luad_clinic_for_bulk, adeno)
data$deceased <- ifelse(data$vital_status == "Alive", 0, 1)

mart <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl", version = 105)
gene_map <- getBM(
  attributes = c("ensembl_gene_id", "external_gene_name"),
  filters = "ensembl_gene_id",
  values = ensembl_ids,
  mart = mart
)

colnames(data)[colnames(data) %in% gene_map$ensembl_gene_id] <- gene_map$external_gene_name

data <- cbind(luad_clinic_for_bulk, data[, gene_map$external_gene_name])
data$overall_survival <- as.numeric(data$overall_survival)

univ_cox <- function(variable) {
  formula <- reformulate(variable, response = "Surv(overall_survival, deceased)")
  cox_model <- coxph(formula, data = data)
  cox_summary <- summary(cox_model)

  c(
    coef = cox_summary$coefficients[1],  
    HR = exp(cox_summary$coefficients[1]), 
    p_value = cox_summary$coefficients[5], 
    CI_lower = exp(cox_summary$conf.int[ ,3]), 
    CI_upper = exp(cox_summary$conf.int[ ,4])
  )
}

colnames(data) <- gsub("-", "_", colnames(data))

univ_results <- lapply(colnames(data[, 8:ncol(data)]), univ_cox) 
univ_results_df <- do.call(rbind, univ_results) |> as.data.frame()
rownames(univ_results_df) <- colnames(data[, 8:ncol(data)])



gen_expression <- data[, 8:ncol(data)]
coef_values <- univ_results_df$coef
risk_scores <- apply(gen_expression, 1, function(x) sum(x * coef_values))
risk_scores <- as.data.frame(risk_scores)
data$risk_scores <- risk_scores$risk_scores

res.cut <- surv_cutpoint(data, time = "overall_survival", event = "deceased", variables = c("risk_scores"))
cutpoint <- res.cut[["cutpoint"]][["cutpoint"]]
data$risk_scores <- ifelse(data$risk_scores > cutpoint, "HIGH", "LOW")

fit_risk_scores <- survfit(Surv(overall_survival, deceased) ~ risk_scores, data = data)
ggsurvplot(fit_risk_scores, data, pval = TRUE, risk.table = TRUE)

luad_final_genes <- readRDS("~/final_results/luad_final_genes.rds")
luad_final_genes <- luad_final_genes[rownames(univ_results_df),]
univ_results_df$cell_type <- luad_final_genes$cell_type
coxa <- coxph(Surv(overall_survival, deceased) ~ risk_scores, data = data)
coxa
## Call:
## coxph(formula = Surv(overall_survival, deceased) ~ risk_scores, 
##     data = data)
## 
##                   coef exp(coef) se(coef)      z       p
## risk_scoresLOW -0.7028    0.4952   0.2256 -3.115 0.00184
## 
## Likelihood ratio test=9.11  on 1 df, p=0.002547
## n= 91, number of events= 91 
##    (152 observations deleted due to missingness)
saveRDS(univ_results_df, "~/final_results/06-03-2025_luad_final_genes.rds")
reactable(univ_results_df)