#Differential Expression Analysis
#Load Seurat Object L7
load("../../../0-IMP-OBJECTS/Harmony_integrated_All_samples_Merged_with_PBMC10x_with_harmony_clustering.Robj")
All_samples_Merged
An object of class Seurat
64169 features across 59355 samples within 6 assays
Active assay: SCT (27417 features, 3000 variable features)
3 layers present: counts, data, scale.data
5 other assays present: RNA, ADT, prediction.score.celltype.l1, prediction.score.celltype.l2, prediction.score.celltype.l3
6 dimensional reductions calculated: integrated_dr, ref.umap, pca, umap, harmony, umap.harmony
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line",label = T, label.box = T)
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "Harmony_snn_res.0.9",label = T, label.box = T)
#Differential Expression Analysis
DefaultAssay(All_samples_Merged) <- "SCT"
Idents(All_samples_Merged) <- "Harmony_snn_res.0.9"
# P1 vs PBMC-Tcells
P1_vs_PBMC_Tcells <- FindMarkers(All_samples_Merged,
ident.1 = c(3, 8, 10, 18), # P1 clusters
ident.2 = c(0, 5, 14, 24, 20), # PBMC Tcells clusters
assay = "SCT")
write.csv(P1_vs_PBMC_Tcells, "New_comparison_P1_vs_PBMC_Tcells.csv")
# Convert to data frame and add gene names as a new column
P1_vs_PBMC_Tcells <- as.data.frame(P1_vs_PBMC_Tcells)
P1_vs_PBMC_Tcells$gene <- rownames(P1_vs_PBMC_Tcells)
# Rearranging the columns for better readability (optional)
P1_vs_PBMC_Tcells <- P1_vs_PBMC_Tcells[,
c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")]
# Create volcano plot for P1 vs PBMC-Tcells
volcano_P1_vs_PBMC_Tcells <- EnhancedVolcano(P1_vs_PBMC_Tcells,
lab = rownames(P1_vs_PBMC_Tcells),
x = 'avg_log2FC',
y = 'p_val_adj',
title = 'P1_vs_PBMC_Tcells',
xlab = bquote(~Log[2]~ 'fold change'),
pCutoff = 1e-100,
FCcutoff = 1.5,
pointSize = 3.0,
labSize = 5.0,
boxedLabels = TRUE,
colAlpha = 0.5,
legendPosition = 'right',
legendLabSize = 10,
legendIconSize = 4.0,
drawConnectors = TRUE,
widthConnectors = 0.5,
colConnectors = 'grey50',
arrowheads = FALSE,
max.overlaps = 30)
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...
print(volcano_P1_vs_PBMC_Tcells)
png("volcano_P1_vs_PBMC_Tcells.png", width = 12, height = 10, units = "in", res = 300)
print(volcano_P1_vs_PBMC_Tcells)
dev.off()
png
2
volcano2_P1_vs_PBMC_Tcells <- EnhancedVolcano(P1_vs_PBMC_Tcells,
lab = rownames(P1_vs_PBMC_Tcells),
x = "avg_log2FC",
y = "p_val_adj",
selectLab = c('EPCAM', 'KIR3DL2', 'FOXM1', 'TWIST1', 'TNFSF9',
'CD80', 'FOS','PTPN6','NCR1','NCR2',
'PCLAF', 'KIR3DL1', 'IL4','ITGA6','CCL5',
'IL7R', 'TCF7', 'PTTG1', 'RRM2', 'MKI67', 'CD70',
'IL2RA', 'FCGR3A', 'GNLY', 'FOXP3', 'SELL', 'LEF1',
'CCL17', 'THY1', 'CD27', 'CD28', 'CD7',
# Key Sézary syndrome genes
'PRF1', 'GZMB', 'NCR1', 'NFATC3',
'KLRK1', 'LCK', 'KLRC1', 'KLRC2', 'TNF',
'KIR3DL1','KIR3DL3','KIR3DL4', 'IFNG', 'IFNGR1', 'CD244', 'FASLG'),
title = "P1_vs_PBMC_Tcells",
subtitle = "Sézary Syndrome Cell Lines",
xlab = bquote(~Log[2]~ 'fold change'),
pCutoff = 0.05,
FCcutoff = 1.5,
pointSize = 3.0,
labSize = 4.0,
labFace = 'bold',
boxedLabels = TRUE,
colAlpha = 0.5,
legendPosition = 'right',
legendLabSize = 10,
legendIconSize = 4.0,
drawConnectors = TRUE,
widthConnectors = 0.5,
colConnectors = 'grey50',
arrowheads = FALSE,
max.overlaps = 30)
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...
print(volcano2_P1_vs_PBMC_Tcells)
png("volcano2_P1_vs_PBMC_Tcells.png", width = 12, height = 10, units = "in", res = 300)
print(volcano2_P1_vs_PBMC_Tcells)
dev.off()
png
2
# Display top differentially expressed genes for each comparison
head(P1_vs_PBMC_Tcells)
NA
NA
DefaultAssay(All_samples_Merged) <- "SCT"
Idents(All_samples_Merged) <- "Harmony_snn_res.0.9"
# P2 vs PBMC-Tcells
P2_vs_PBMC_Tcells <- FindMarkers(All_samples_Merged,
ident.1 = c(1, 2, 13), # P2 clusters
ident.2 = c(0, 5, 14, 24, 20), # PBMC T cells clusters
assay = "SCT")
write.csv(P2_vs_PBMC_Tcells, "New_comparison_P2_vs_PBMC_Tcells.csv")
# Create volcano plot for P2 vs PBMC-Tcells
volcano_P2_vs_PBMC_Tcells <- EnhancedVolcano(P2_vs_PBMC_Tcells,
lab = rownames(P2_vs_PBMC_Tcells),
x = 'avg_log2FC',
y = 'p_val_adj',
title = 'P2_vs_PBMC_Tcells',
pCutoff = 0.05,
FCcutoff = 1.5,
pointSize = 1.5,
labSize = 4.0,
col = c('grey', 'darkgreen', 'blue', 'red'),
colAlpha = 0.5,
legendPosition = 'right',
legendLabSize = 10,
legendIconSize = 4.0,
drawConnectors = TRUE,
widthConnectors = 0.5)
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...
print(volcano_P2_vs_PBMC_Tcells)
png("volcano_P2_vs_PBMC_Tcells.png", width = 12, height = 10, units = "in", res = 300)
print(volcano_P2_vs_PBMC_Tcells)
dev.off()
png
2
volcano2_P2_vs_PBMC_Tcells <- EnhancedVolcano(P2_vs_PBMC_Tcells,
lab = rownames(P2_vs_PBMC_Tcells),
x = "avg_log2FC",
y = "p_val_adj",
selectLab = c('KIR3DL2','KIR3DL1','KIR3DL3','KIR3DL4', 'TWIST1', 'TNFSF9',
'FOS', 'TCF7','LEF1',
'CD86', 'VCAM1','CCL5',
'CD40', 'CD70',
'IL2RA', 'FCGR3A', 'GNLY', 'FOXP3', 'LEF1',
'CCL17', 'THY1', 'CD27', 'CD28', 'CD7','EPCAM','TOX','IL16','IL21',
# Key Sézary syndrome genes
'PRF1', 'GZMB',
'KLRK1', 'LCK', 'KLRC1', 'KLRC2',
'IFNG', 'IFNGR1', 'FASLG'),
title = "P2_vs_PBMC_Tcells",
subtitle = "Sézary Syndrome Cell Lines",
xlab = bquote(~Log[2]~ 'fold change'),
pCutoff = 0.05,
FCcutoff = 1.5,
pointSize = 3.0,
labSize = 4.0,
labFace = 'bold',
boxedLabels = TRUE,
colAlpha = 0.5,
legendPosition = 'right',
legendLabSize = 10,
legendIconSize = 4.0,
drawConnectors = TRUE,
widthConnectors = 0.5,
colConnectors = 'grey50',
arrowheads = FALSE,
max.overlaps = 30)
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...
print(volcano2_P2_vs_PBMC_Tcells)
png("volcano2_P2_vs_PBMC_Tcells.png", width = 12, height = 10, units = "in", res = 300)
print(volcano2_P2_vs_PBMC_Tcells)
dev.off()
png
2
# Display top differentially expressed genes for each comparison
head(P2_vs_PBMC_Tcells)
NA
NA
DefaultAssay(All_samples_Merged) <- "SCT"
Idents(All_samples_Merged) <- "Harmony_snn_res.0.9"
# P2_vs_PBMC_Tcells
P3_vs_PBMC_Tcells <- FindMarkers(All_samples_Merged,
ident.1 = c(4, 7, 9, 6, 16, 19), # P2 clusters
ident.2 = c(0, 5, 14, 24, 20), # P3 clusters
assay = "SCT")
write.csv(P3_vs_PBMC_Tcells, "comparison_P3_vs_PBMC_Tcells.csv")
# Create volcano plot for P2_vs_PBMC_Tcells
volcano_P3_vs_PBMC_Tcells <- EnhancedVolcano(P3_vs_PBMC_Tcells,
lab = rownames(P3_vs_PBMC_Tcells),
x = 'avg_log2FC',
y = 'p_val_adj',
title = 'P3_vs_PBMC_Tcells',
pCutoff = 0.05,
FCcutoff = 1.5,
pointSize = 1.5,
labSize = 4.0,
col = c('grey', 'darkgreen', 'blue', 'red'),
colAlpha = 0.5,
legendPosition = 'right',
legendLabSize = 10,
legendIconSize = 4.0,
drawConnectors = TRUE,
widthConnectors = 0.5)
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...
print(volcano_P3_vs_PBMC_Tcells)
png("volcano_P3_vs_PBMC_Tcells.png", width = 12, height = 10, units = "in", res = 300)
print(volcano_P3_vs_PBMC_Tcells)
dev.off()
png
2
volcano2_P3_vs_PBMC_Tcells <- EnhancedVolcano(P3_vs_PBMC_Tcells,
lab = rownames(P3_vs_PBMC_Tcells),
x = "avg_log2FC",
y = "p_val_adj",
selectLab = c('KIR3DL2','KIR3DL1','KIR3DL3','KIR3DL4', 'TWIST1', 'TNFSF9',
'VCAM1','CCL5','CCL23','IL13','IL19', 'TIGIT','JUN','TP53','CD40','CCR10',
'CD40', 'KIT','CD52','CD44','RORC','TIFA',
'FOXP3',
'CCL17', 'THY1', 'CD28', 'CD7','EPCAM','IL16',
# Key Sézary syndrome genes
'KLRK1', 'KLRC1', 'KLRC2',
'IFNG', 'IFNGR1', 'FASLG'),
title = "P3_vs_PBMC_Tcells",
subtitle = "Sézary Syndrome Cell Lines",
xlab = bquote(~Log[2]~ 'fold change'),
pCutoff = 0.05,
FCcutoff = 1.5,
pointSize = 3.0,
labSize = 4.0,
labFace = 'bold',
boxedLabels = TRUE,
colAlpha = 0.5,
legendPosition = 'right',
legendLabSize = 10,
legendIconSize = 4.0,
drawConnectors = TRUE,
widthConnectors = 0.5,
colConnectors = 'grey50',
arrowheads = FALSE,
max.overlaps = 30)
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...
print(volcano2_P3_vs_PBMC_Tcells)
png("volcano2_P3_vs_PBMC_Tcells.png", width = 12, height = 10, units = "in", res = 300)
print(volcano2_P3_vs_PBMC_Tcells)
dev.off()
png
2
print(volcano_P1_vs_PBMC_Tcells)
print(volcano_P2_vs_PBMC_Tcells)
print(volcano_P3_vs_PBMC_Tcells)
print(volcano2_P1_vs_PBMC_Tcells)
print(volcano2_P2_vs_PBMC_Tcells)
print(volcano2_P3_vs_PBMC_Tcells)
# Display top differentially expressed genes for each comparison
head(P1_vs_PBMC_Tcells)
head(P2_vs_PBMC_Tcells)
head(P3_vs_PBMC_Tcells)
NA
NA
library(clusterProfiler)
library(org.Hs.eg.db)
library(enrichplot)
perform_go_enrichment <- function(gene_list, gene_universe, title) {
ego <- enrichGO(gene = gene_list,
universe = gene_universe,
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
ont = "BP",
pAdjustMethod = "BH",
qvalueCutoff = 0.05,
readable = TRUE)
if (nrow(ego@result) == 0) {
warning(paste("No enriched GO terms found for", title))
return(NULL)
}
p <- dotplot(ego, showCategory = 10, title = paste("GO -", title)) +
theme(axis.text.y = element_text(size = 8))
print(p)
png(paste0("GO_enrichment_", gsub(" ", "_", title), ".png"), width = 12, height = 8, units = "in", res = 300)
print(p)
dev.off()
return(ego)
}
perform_kegg_enrichment <- function(gene_list, gene_universe, title) {
# Convert gene symbols to Entrez IDs
entrez_ids <- bitr(gene_list, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
universe_entrez <- bitr(gene_universe, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
print(paste("Number of input genes:", length(gene_list)))
print(paste("Number of input genes mapped to Entrez IDs:", nrow(entrez_ids)))
print(paste("Number of universe genes:", length(gene_universe)))
print(paste("Number of universe genes mapped to Entrez IDs:", nrow(universe_entrez)))
if(nrow(entrez_ids) == 0) {
warning(paste("No genes could be mapped for", title))
return(NULL)
}
tryCatch({
ekegg <- enrichKEGG(gene = entrez_ids$ENTREZID,
universe = universe_entrez$ENTREZID,
organism = 'hsa',
keyType = "kegg",
pvalueCutoff = 0.05,
pAdjustMethod = "BH")
if(nrow(ekegg@result) == 0) {
warning(paste("No enriched KEGG pathways found for", title))
return(NULL)
}
p <- dotplot(ekegg, showCategory = 10, title = paste("KEGG -", title)) +
theme(axis.text.y = element_text(size = 8))
print(p)
png(paste0("KEGG_enrichment_", gsub(" ", "_", title), ".png"), width = 12, height = 8, units = "in", res = 300)
print(p)
dev.off()
return(ekegg)
}, error = function(e) {
warning(paste("Error in KEGG enrichment for", title, ":", e$message))
return(NULL)
})
}
gene_universe <- rownames(All_samples_Merged)
# P1_vs_PBMC_Tcells comparison
upregulated_genes_P1_vs_PBMC_Tcells <- rownames(P1_vs_PBMC_Tcells[P1_vs_PBMC_Tcells$avg_log2FC > 2.5 & P1_vs_PBMC_Tcells$p_val_adj < 0.05, ])
downregulated_genes_P1_vs_PBMC_Tcells <- rownames(P1_vs_PBMC_Tcells[P1_vs_PBMC_Tcells$avg_log2FC < -2.5 & P1_vs_PBMC_Tcells$p_val_adj < 0.05, ])
go_up_P1_vs_PBMC_Tcells <- perform_go_enrichment(upregulated_genes_P1_vs_PBMC_Tcells, gene_universe, "Upregulated Genes in P1_vs_PBMC_Tcells")
go_down_P1_vs_PBMC_Tcells <- perform_go_enrichment(downregulated_genes_P1_vs_PBMC_Tcells, gene_universe, "Downregulated Genes in P1_vs_PBMC_Tcells")
kegg_up_P1_vs_PBMC_Tcells <- perform_kegg_enrichment(upregulated_genes_P1_vs_PBMC_Tcells, gene_universe, "Upregulated Genes in P1_vs_PBMC_Tcells")
'select()' returned 1:1 mapping between keys and columns
Avis : 9.94% of input gene IDs are fail to map...'select()' returned 1:many mapping between keys and columns
Avis : 28.75% of input gene IDs are fail to map...
[1] "Number of input genes: 2504"
[1] "Number of input genes mapped to Entrez IDs: 2255"
[1] "Number of universe genes: 27417"
[1] "Number of universe genes mapped to Entrez IDs: 19538"
kegg_down_P1_vs_PBMC_Tcells <- perform_kegg_enrichment(downregulated_genes_P1_vs_PBMC_Tcells, gene_universe, "Downregulated Genes in P1_vs_PBMC_Tcells")
'select()' returned 1:1 mapping between keys and columns
Avis : 11.33% of input gene IDs are fail to map...'select()' returned 1:many mapping between keys and columns
Avis : 28.75% of input gene IDs are fail to map...
[1] "Number of input genes: 618"
[1] "Number of input genes mapped to Entrez IDs: 548"
[1] "Number of universe genes: 27417"
[1] "Number of universe genes mapped to Entrez IDs: 19538"
# P2_vs_PBMC_Tcells comparison
upregulated_genes_P2_vs_PBMC_Tcells <- rownames(P2_vs_PBMC_Tcells[P2_vs_PBMC_Tcells$avg_log2FC > 0.5 & P2_vs_PBMC_Tcells$p_val_adj < 0.001, ])
downregulated_genes_P2_vs_PBMC_Tcells <- rownames(P2_vs_PBMC_Tcells[P2_vs_PBMC_Tcells$avg_log2FC < -2.5 & P2_vs_PBMC_Tcells$p_val_adj < 0.05, ])
go_up_P2_vs_PBMC_Tcells <- perform_go_enrichment(upregulated_genes_P2_vs_PBMC_Tcells, gene_universe, "Upregulated Genes in P2_vs_PBMC_Tcells")
go_down_P2_vs_PBMC_Tcells <- perform_go_enrichment(downregulated_genes_P2_vs_PBMC_Tcells, gene_universe, "Downregulated Genes in P2_vs_PBMC_Tcells")
kegg_up_P2_vs_PBMC_Tcells <- perform_kegg_enrichment(upregulated_genes_P2_vs_PBMC_Tcells, gene_universe, "Upregulated Genes in P2_vs_PBMC_Tcells")
'select()' returned 1:many mapping between keys and columns
Avis : 10.46% of input gene IDs are fail to map...'select()' returned 1:many mapping between keys and columns
Avis : 28.75% of input gene IDs are fail to map...
[1] "Number of input genes: 10314"
[1] "Number of input genes mapped to Entrez IDs: 9238"
[1] "Number of universe genes: 27417"
[1] "Number of universe genes mapped to Entrez IDs: 19538"
kegg_down_P2_vs_PBMC_Tcells <- perform_kegg_enrichment(downregulated_genes_P2_vs_PBMC_Tcells, gene_universe, "Downregulated Genes in P2_vs_PBMC_Tcells")
'select()' returned 1:1 mapping between keys and columns
Avis : 14.44% of input gene IDs are fail to map...'select()' returned 1:many mapping between keys and columns
Avis : 28.75% of input gene IDs are fail to map...
[1] "Number of input genes: 810"
[1] "Number of input genes mapped to Entrez IDs: 693"
[1] "Number of universe genes: 27417"
[1] "Number of universe genes mapped to Entrez IDs: 19538"
# P3_vs_PBMC_Tcells comparison
upregulated_genes_P3_vs_PBMC_Tcells <- rownames(P3_vs_PBMC_Tcells[P3_vs_PBMC_Tcells$avg_log2FC > 1.5 & P3_vs_PBMC_Tcells$p_val_adj < 0.05, ])
downregulated_genes_P3_vs_PBMC_Tcells <- rownames(P3_vs_PBMC_Tcells[P3_vs_PBMC_Tcells$avg_log2FC < -1.5 & P3_vs_PBMC_Tcells$p_val_adj < 0.05, ])
go_up_P3_vs_PBMC_Tcells <- perform_go_enrichment(upregulated_genes_P3_vs_PBMC_Tcells, gene_universe, "Upregulated Genes in P3_vs_PBMC_Tcells")
go_down_P3_vs_PBMC_Tcells <- perform_go_enrichment(downregulated_genes_P3_vs_PBMC_Tcells, gene_universe, "Downregulated Genes in P3_vs_PBMC_Tcells")
kegg_up_P3_vs_PBMC_Tcells <- perform_kegg_enrichment(upregulated_genes_P3_vs_PBMC_Tcells, gene_universe, "Upregulated Genes in P3_vs_PBMC_Tcells")
'select()' returned 1:many mapping between keys and columns
Avis : 10.59% of input gene IDs are fail to map...'select()' returned 1:many mapping between keys and columns
Avis : 28.75% of input gene IDs are fail to map...
[1] "Number of input genes: 5913"
[1] "Number of input genes mapped to Entrez IDs: 5288"
[1] "Number of universe genes: 27417"
[1] "Number of universe genes mapped to Entrez IDs: 19538"
kegg_down_P3_vs_PBMC_Tcells <- perform_kegg_enrichment(downregulated_genes_P3_vs_PBMC_Tcells, gene_universe, "Downregulated Genes in P3_vs_PBMC_Tcells")
'select()' returned 1:1 mapping between keys and columns
Avis : 14.31% of input gene IDs are fail to map...'select()' returned 1:many mapping between keys and columns
Avis : 28.75% of input gene IDs are fail to map...
[1] "Number of input genes: 1041"
[1] "Number of input genes mapped to Entrez IDs: 892"
[1] "Number of universe genes: 27417"
[1] "Number of universe genes mapped to Entrez IDs: 19538"