#Differential Expression Analysis
All_samples_Merged
An object of class Seurat
62900 features across 49305 samples within 6 assays
Active assay: SCT (26176 features, 3000 variable features)
3 layers present: counts, data, scale.data
5 other assays present: RNA, ADT, prediction.score.celltype.l1, prediction.score.celltype.l2, prediction.score.celltype.l3
5 dimensional reductions calculated: integrated_dr, ref.umap, pca, umap, harmony
DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line",label = T, label.box = T)
DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters",label = T, label.box = T)
#Differential Expression Analysis
DefaultAssay(All_samples_Merged) <- "SCT"
Idents(All_samples_Merged) <- "seurat_clusters"
# Patient 1 vs Patient 2
p1_vs_p2 <- FindMarkers(All_samples_Merged,
ident.1 = c(5, 1, 9), # P1 clusters
ident.2 = c(2, 6, 8), # P2 clusters
assay = "SCT")
write.csv(p1_vs_p2, "comparison_P1_vs_P2.csv")
# Create volcano plot for P1 vs P2
volcano_p1_vs_p2 <- EnhancedVolcano(p1_vs_p2,
lab = rownames(p1_vs_p2),
x = 'avg_log2FC',
y = 'p_val_adj',
title = 'P1 vs P2',
xlab = bquote(~Log[2]~ 'fold change'),
pCutoff = 0.05,
FCcutoff = 1.5,
pointSize = 3.0,
labSize = 5.0,
boxedLabels = TRUE,
colAlpha = 0.5,
legendPosition = 'right',
legendLabSize = 10,
legendIconSize = 4.0,
drawConnectors = TRUE,
widthConnectors = 0.5,
colConnectors = 'grey50',
arrowheads = FALSE,
max.overlaps = 30)
Warning: One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...
print(volcano_p1_vs_p2)
png("volcano_P1_vs_P2.png", width = 12, height = 10, units = "in", res = 300)
print(volcano_p1_vs_p2)
dev.off()
png
2
volcano2_p1_vs_p2 <- EnhancedVolcano(p1_vs_p2,
lab = rownames(p1_vs_p2),
x = "avg_log2FC",
y = "p_val_adj",
selectLab = c('EPCAM', 'KIR3DL2', 'FOXM1', 'TWIST1', 'TNFSF9',
'CD80', 'FOS','PTPN6','NCR1','NCR2',
'PCLAF', 'KIR3DL1', 'IL4','ITGA6','CCL5',
'IL7R', 'TCF7', 'PTTG1', 'RRM2', 'MKI67', 'CD70',
'IL2RA', 'FCGR3A', 'GNLY', 'FOXP3', 'SELL', 'LEF1',
'CCL17', 'THY1', 'CD27', 'CD28', 'CD7',
# Key Sézary syndrome genes
'PRF1', 'GZMB', 'NCR1', 'NFATC3',
'KLRK1', 'LCK', 'KLRC1', 'KLRC2', 'TNF',
'KIR3DL1','KIR3DL3','KIR3DL4', 'IFNG', 'IFNGR1', 'CD244', 'FASLG'),
title = "P1 vs P2",
subtitle = "Sézary Syndrome Cell Lines",
xlab = bquote(~Log[2]~ 'fold change'),
pCutoff = 0.05,
FCcutoff = log2(1.5),
pointSize = 3.0,
labSize = 4.0,
labFace = 'bold',
boxedLabels = TRUE,
colAlpha = 0.5,
legendPosition = 'right',
legendLabSize = 10,
legendIconSize = 4.0,
drawConnectors = TRUE,
widthConnectors = 0.5,
colConnectors = 'grey50',
arrowheads = FALSE,
max.overlaps = 30)
Warning: One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...
print(volcano2_p1_vs_p2)
png("volcano2_P1_vs_P2.png", width = 12, height = 10, units = "in", res = 300)
print(volcano2_p1_vs_p2)
dev.off()
png
2
# Display top differentially expressed genes for each comparison
head(p1_vs_p2)
NA
NA
DefaultAssay(All_samples_Merged) <- "SCT"
Idents(All_samples_Merged) <- "seurat_clusters"
# Patient 1 vs Patient 3
p1_vs_p3 <- FindMarkers(All_samples_Merged,
ident.1 = c(5, 1, 9), # P1 clusters
ident.2 = c(4, 0, 7, 11, 12, 13), # P3 clusters
assay = "SCT")
write.csv(p1_vs_p3, "comparison_P1_vs_P3.csv")
# Create volcano plot for P1 vs P3
volcano_p1_vs_p3 <- EnhancedVolcano(p1_vs_p3,
lab = rownames(p1_vs_p3),
x = 'avg_log2FC',
y = 'p_val_adj',
title = 'P1 vs P3',
pCutoff = 0.05,
FCcutoff = 1,
pointSize = 1.5,
labSize = 4.0,
col = c('grey', 'darkgreen', 'blue', 'red'),
colAlpha = 0.5,
legendPosition = 'right',
legendLabSize = 10,
legendIconSize = 4.0,
drawConnectors = TRUE,
widthConnectors = 0.5)
Warning: One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...
print(volcano_p1_vs_p3)
png("volcano_P1_vs_P3.png", width = 12, height = 10, units = "in", res = 300)
print(volcano_p1_vs_p3)
dev.off()
png
2
volcano2_p1_vs_p3 <- EnhancedVolcano(p1_vs_p3,
lab = rownames(p1_vs_p3),
x = "avg_log2FC",
y = "p_val_adj",
selectLab = c('KIR3DL2','KIR3DL1','KIR3DL3','KIR3DL4', 'TWIST1', 'TNFSF9',
'FOS', 'TCF7','LEF1',
'CD86', 'VCAM1','CCL5',
'CD40', 'CD70',
'IL2RA', 'FCGR3A', 'GNLY', 'FOXP3', 'LEF1',
'CCL17', 'THY1', 'CD27', 'CD28', 'CD7','EPCAM','TOX','IL16','IL21',
# Key Sézary syndrome genes
'PRF1', 'GZMB',
'KLRK1', 'LCK', 'KLRC1', 'KLRC2',
'IFNG', 'IFNGR1', 'FASLG'),
title = "P1 vs P3",
subtitle = "Sézary Syndrome Cell Lines",
xlab = bquote(~Log[2]~ 'fold change'),
pCutoff = 0.05,
FCcutoff = 1.5,
pointSize = 3.0,
labSize = 4.0,
labFace = 'bold',
boxedLabels = TRUE,
colAlpha = 0.5,
legendPosition = 'right',
legendLabSize = 10,
legendIconSize = 4.0,
drawConnectors = TRUE,
widthConnectors = 0.5,
colConnectors = 'grey50',
arrowheads = FALSE,
max.overlaps = 30)
Warning: One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...
print(volcano2_p1_vs_p3)
png("volcano2_P1_vs_P3.png", width = 12, height = 10, units = "in", res = 300)
print(volcano2_p1_vs_p3)
dev.off()
png
2
# Display top differentially expressed genes for each comparison
head(p1_vs_p3)
NA
NA
DefaultAssay(All_samples_Merged) <- "SCT"
Idents(All_samples_Merged) <- "seurat_clusters"
# Patient 2 vs Patient 3
p2_vs_p3 <- FindMarkers(All_samples_Merged,
ident.1 = c(2, 6, 8), # P2 clusters
ident.2 = c(4, 0, 7, 11, 12, 13), # P3 clusters
assay = "SCT")
write.csv(p2_vs_p3, "comparison_P2_vs_P3.csv")
# Create volcano plot for P2 vs P3
volcano_p2_vs_p3 <- EnhancedVolcano(p2_vs_p3,
lab = rownames(p2_vs_p3),
x = 'avg_log2FC',
y = 'p_val_adj',
title = 'P2 vs P3',
pCutoff = 0.05,
FCcutoff = 1,
pointSize = 1.5,
labSize = 4.0,
col = c('grey', 'darkgreen', 'blue', 'red'),
colAlpha = 0.5,
legendPosition = 'right',
legendLabSize = 10,
legendIconSize = 4.0,
drawConnectors = TRUE,
widthConnectors = 0.5)
Warning: One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...
print(volcano_p2_vs_p3)
png("volcano_P2_vs_P3.png", width = 12, height = 10, units = "in", res = 300)
print(volcano_p2_vs_p3)
dev.off()
png
2
volcano2_p2_vs_p3 <- EnhancedVolcano(p2_vs_p3,
lab = rownames(p2_vs_p3),
x = "avg_log2FC",
y = "p_val_adj",
selectLab = c('KIR3DL2','KIR3DL1','KIR3DL3','KIR3DL4', 'TWIST1', 'TNFSF9',
'VCAM1','CCL5','CCL23','IL13','IL19', 'TIGIT','JUN','TP53','CD40','CCR10',
'CD40', 'KIT','CD52','CD44','RORC','TIFA',
'FOXP3',
'CCL17', 'THY1', 'CD28', 'CD7','EPCAM','IL16',
# Key Sézary syndrome genes
'KLRK1', 'KLRC1', 'KLRC2',
'IFNG', 'IFNGR1', 'FASLG'),
title = "P2 vs P3",
subtitle = "Sézary Syndrome Cell Lines",
xlab = bquote(~Log[2]~ 'fold change'),
pCutoff = 0.05,
FCcutoff = 1.5,
pointSize = 3.0,
labSize = 4.0,
labFace = 'bold',
boxedLabels = TRUE,
colAlpha = 0.5,
legendPosition = 'right',
legendLabSize = 10,
legendIconSize = 4.0,
drawConnectors = TRUE,
widthConnectors = 0.5,
colConnectors = 'grey50',
arrowheads = FALSE,
max.overlaps = 30)
Warning: One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...
print(volcano2_p2_vs_p3)
png("volcano2_P2_vs_P3.png", width = 12, height = 10, units = "in", res = 300)
print(volcano2_p2_vs_p3)
dev.off()
png
2
print(volcano_p1_vs_p2)
print(volcano_p1_vs_p3)
print(volcano_p2_vs_p3)
print(volcano2_p1_vs_p2)
print(volcano2_p1_vs_p3)
print(volcano2_p2_vs_p3)
# Display top differentially expressed genes for each comparison
head(p1_vs_p2)
head(p1_vs_p3)
head(p2_vs_p3)
NA
NA
library(clusterProfiler)
library(org.Hs.eg.db)
library(enrichplot)
perform_go_enrichment <- function(gene_list, gene_universe, title) {
ego <- enrichGO(gene = gene_list,
universe = gene_universe,
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
ont = "BP",
pAdjustMethod = "BH",
qvalueCutoff = 0.05,
readable = TRUE)
if (nrow(ego@result) == 0) {
warning(paste("No enriched GO terms found for", title))
return(NULL)
}
p <- dotplot(ego, showCategory = 20, title = paste("GO -", title)) +
theme(axis.text.y = element_text(size = 8))
print(p)
png(paste0("GO_enrichment_", gsub(" ", "_", title), ".png"), width = 12, height = 8, units = "in", res = 300)
print(p)
dev.off()
return(ego)
}
perform_kegg_enrichment <- function(gene_list, gene_universe, title) {
# Convert gene symbols to Entrez IDs
entrez_ids <- bitr(gene_list, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
universe_entrez <- bitr(gene_universe, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
print(paste("Number of input genes:", length(gene_list)))
print(paste("Number of input genes mapped to Entrez IDs:", nrow(entrez_ids)))
print(paste("Number of universe genes:", length(gene_universe)))
print(paste("Number of universe genes mapped to Entrez IDs:", nrow(universe_entrez)))
if(nrow(entrez_ids) == 0) {
warning(paste("No genes could be mapped for", title))
return(NULL)
}
tryCatch({
ekegg <- enrichKEGG(gene = entrez_ids$ENTREZID,
universe = universe_entrez$ENTREZID,
organism = 'hsa',
keyType = "kegg",
pvalueCutoff = 0.05,
pAdjustMethod = "BH")
if(nrow(ekegg@result) == 0) {
warning(paste("No enriched KEGG pathways found for", title))
return(NULL)
}
p <- dotplot(ekegg, showCategory = 20, title = paste("KEGG -", title)) +
theme(axis.text.y = element_text(size = 8))
print(p)
png(paste0("KEGG_enrichment_", gsub(" ", "_", title), ".png"), width = 12, height = 8, units = "in", res = 300)
print(p)
dev.off()
return(ekegg)
}, error = function(e) {
warning(paste("Error in KEGG enrichment for", title, ":", e$message))
return(NULL)
})
}
gene_universe <- rownames(All_samples_Merged)
# P1 vs P2 comparison
upregulated_genes_P1vsP2 <- rownames(p1_vs_p2[p1_vs_p2$avg_log2FC > 1.5 & p1_vs_p2$p_val_adj < 0.001, ])
downregulated_genes_P1vsP2 <- rownames(p1_vs_p2[p1_vs_p2$avg_log2FC < -1.5 & p1_vs_p2$p_val_adj < 0.001, ])
go_up_P1vsP2 <- perform_go_enrichment(upregulated_genes_P1vsP2, gene_universe, "Upregulated Genes in P1 vs P2")
go_down_P1vsP2 <- perform_go_enrichment(downregulated_genes_P1vsP2, gene_universe, "Downregulated Genes in P1 vs P2")
kegg_up_P1vsP2 <- perform_kegg_enrichment(upregulated_genes_P1vsP2, gene_universe, "Upregulated Genes in P1 vs P2")
'select()' returned 1:1 mapping between keys and columns
Warning: 13.39% of input gene IDs are fail to map...'select()' returned 1:many mapping between keys and columns
Warning: 28.25% of input gene IDs are fail to map...
[1] "Number of input genes: 1591"
[1] "Number of input genes mapped to Entrez IDs: 1378"
[1] "Number of universe genes: 26176"
[1] "Number of universe genes mapped to Entrez IDs: 18785"
kegg_down_P1vsP2 <- perform_kegg_enrichment(downregulated_genes_P1vsP2, gene_universe, "Downregulated Genes in P1 vs P2")
'select()' returned 1:many mapping between keys and columns
Warning: 18.52% of input gene IDs are fail to map...'select()' returned 1:many mapping between keys and columns
Warning: 28.25% of input gene IDs are fail to map...
[1] "Number of input genes: 1982"
[1] "Number of input genes mapped to Entrez IDs: 1616"
[1] "Number of universe genes: 26176"
[1] "Number of universe genes mapped to Entrez IDs: 18785"
# P1 vs P3 comparison
upregulated_genes_P1vsP3 <- rownames(p1_vs_p3[p1_vs_p3$avg_log2FC > 1.5 & p1_vs_p3$p_val_adj < 0.001, ])
downregulated_genes_P1vsP3 <- rownames(p1_vs_p3[p1_vs_p3$avg_log2FC < -1.5 & p1_vs_p3$p_val_adj < 0.001, ])
go_up_P1vsP3 <- perform_go_enrichment(upregulated_genes_P1vsP3, gene_universe, "Upregulated Genes in P1 vs P3")
go_down_P1vsP3 <- perform_go_enrichment(downregulated_genes_P1vsP3, gene_universe, "Downregulated Genes in P1 vs P3")
kegg_up_P1vsP3 <- perform_kegg_enrichment(upregulated_genes_P1vsP3, gene_universe, "Upregulated Genes in P1 vs P3")
'select()' returned 1:many mapping between keys and columns
Warning: 12.52% of input gene IDs are fail to map...'select()' returned 1:many mapping between keys and columns
Warning: 28.25% of input gene IDs are fail to map...
[1] "Number of input genes: 1366"
[1] "Number of input genes mapped to Entrez IDs: 1196"
[1] "Number of universe genes: 26176"
[1] "Number of universe genes mapped to Entrez IDs: 18785"
kegg_down_P1vsP3 <- perform_kegg_enrichment(downregulated_genes_P1vsP3, gene_universe, "Downregulated Genes in P1 vs P3")
'select()' returned 1:1 mapping between keys and columns
Warning: 17.83% of input gene IDs are fail to map...'select()' returned 1:many mapping between keys and columns
Warning: 28.25% of input gene IDs are fail to map...
[1] "Number of input genes: 1694"
[1] "Number of input genes mapped to Entrez IDs: 1392"
[1] "Number of universe genes: 26176"
[1] "Number of universe genes mapped to Entrez IDs: 18785"
# P2 vs P3 comparison
upregulated_genes_P2vsP3 <- rownames(p2_vs_p3[p2_vs_p3$avg_log2FC > 1.5 & p2_vs_p3$p_val_adj < 0.001, ])
downregulated_genes_P2vsP3 <- rownames(p2_vs_p3[p2_vs_p3$avg_log2FC < -1.5 & p2_vs_p3$p_val_adj < 0.001, ])
go_up_P2vsP3 <- perform_go_enrichment(upregulated_genes_P2vsP3, gene_universe, "Upregulated Genes in P2 vs P3")
go_down_P2vsP3 <- perform_go_enrichment(downregulated_genes_P2vsP3, gene_universe, "Downregulated Genes in P2 vs P3")
kegg_up_P2vsP3 <- perform_kegg_enrichment(upregulated_genes_P2vsP3, gene_universe, "Upregulated Genes in P2 vs P3")
'select()' returned 1:many mapping between keys and columns
Warning: 17.18% of input gene IDs are fail to map...'select()' returned 1:many mapping between keys and columns
Warning: 28.25% of input gene IDs are fail to map...
[1] "Number of input genes: 1269"
[1] "Number of input genes mapped to Entrez IDs: 1053"
[1] "Number of universe genes: 26176"
[1] "Number of universe genes mapped to Entrez IDs: 18785"
kegg_down_P2vsP3 <- perform_kegg_enrichment(downregulated_genes_P2vsP3, gene_universe, "Downregulated Genes in P2 vs P3")
'select()' returned 1:1 mapping between keys and columns
Warning: 18.13% of input gene IDs are fail to map...'select()' returned 1:many mapping between keys and columns
Warning: 28.25% of input gene IDs are fail to map...
[1] "Number of input genes: 1219"
[1] "Number of input genes mapped to Entrez IDs: 998"
[1] "Number of universe genes: 26176"
[1] "Number of universe genes mapped to Entrez IDs: 18785"