Introduction

This analysis explores transcriptional and cellular changes associated with malaria infection using single-cell RNA sequencing data. Using a processed Seurat object, we examine immune cell composition, infection-associated clustering patterns, and differential gene expression, focusing on monocyte subsets. The aim is to generate publication-quality visualizations to support molecular surveillance and immunological interpretation in malaria-endemic settings.

# Load libraries
library(Seurat)
library(ggplot2)
library(patchwork)
library(dplyr)
library(RColorBrewer)
library(viridis)
library(ggrepel)
library(VennDiagram)
library(ComplexHeatmap)
library(circlize)

# Load processed Seurat object
seurat_combined <- readRDS("MoFu_unfiltered.rds")

# -----------------------------
# 1. Cell Type Annotation UMAP
# -----------------------------
cell_colors <- c(
  "B intermediate" = "#FF8C00", "B memory" = "#FF6500", "B naive" = "#FFD700",
  "CD14 Mono" = "#D2691E", "CD16 Mono" = "#CD853F",
  "CD4 Naive" = "#00BFFF", "CD4 TCM" = "#1E90FF", "CD4 TEM" = "#0066CC",
  "CD8 Naive" = "#00FF7F", "CD8 TCM" = "#00C957", "CD8 TEM" = "#008B45",
  "NK" = "#DA70D6", "pDC" = "#9932CC", "Platelet" = "#FF69B4"
)

p1 <- DimPlot(seurat_combined, reduction="azimuth_umap", group.by="predicted.celltype.l2",
              cols=cell_colors, pt.size=0.1, label=TRUE, label.size=3, repel=TRUE) +
  theme_classic() +
  theme(legend.position="right", legend.text=element_text(size=8),
        axis.text=element_text(size=10), axis.title=element_text(size=12, face="bold")) +
  labs(title="Cell Type Annotations")
p1

# -----------------------------
# 2. UMAP Split by Infection
# -----------------------------
p2 <- DimPlot(seurat_combined, reduction="azimuth_umap", split.by="Infection", group.by="Ethnicity",
              pt.size=0.1, ncol=2) +
  theme_classic() +
  theme(strip.background=element_blank(), strip.text=element_text(size=12, face="bold")) +
  labs(title="Infection Status Comparison")
p2

# -----------------------------
# 3. PCA of Monocytes
# -----------------------------
mono_subset <- subset(seurat_combined, subset=predicted.celltype.l2 %in% c("CD14 Mono", "CD16 Mono"))
mono_subset <- FindVariableFeatures(mono_subset, nfeatures=2000)
mono_subset <- ScaleData(mono_subset)
mono_subset <- RunPCA(mono_subset, features=VariableFeatures(mono_subset), npcs=30)

pca_data <- as.data.frame(Embeddings(mono_subset, "pca")[,1:2])
pca_data$CellType <- mono_subset$predicted.celltype.l2
pca_data$Infection <- mono_subset$Infection
pca_data$Ethnicity <- mono_subset$Ethnicity

p3 <- ggplot(pca_data, aes(x=PC_1, y=PC_2, color=Ethnicity, shape=CellType)) +
  geom_point(size=2, alpha=0.6) +
  scale_color_manual(values=c("Fulani"="#E74C3C", "Mossi"="#3498DB")) +
  theme_classic() +
  theme(legend.position="right", axis.text=element_text(size=10),
        axis.title=element_text(size=12, face="bold")) +
  labs(title="PCA: Monocyte Subsets",
       x=paste0("PC1: ", round(Stdev(mono_subset, "pca")[1]^2 / sum(Stdev(mono_subset, "pca")^2)*100,1), "% variance"),
       y=paste0("PC2: ", round(Stdev(mono_subset, "pca")[2]^2 / sum(Stdev(mono_subset, "pca")^2)*100,1), "% variance"))
p3

# -----------------------------
# 4. Differential Expression Heatmap
# -----------------------------
cd14_subset <- subset(seurat_combined, subset=predicted.celltype.l2=="CD14 Mono")
Idents(cd14_subset) <- "Infection"
cd14_degs <- FindMarkers(cd14_subset, ident.1="Inf", ident.2="NI")
top_genes <- rownames(cd14_degs)[1:20]
cd14_subset <- ScaleData(cd14_subset, features=top_genes)
expr_mat <- as.matrix(cd14_subset[["RNA"]]@scale.data[top_genes, ])
col_anno <- HeatmapAnnotation(Infection=cd14_subset$Infection,
                              col=list(Infection=c("NI"="blue","Inf"="red")))

ht <- Heatmap(expr_mat, name="Z-score", col=colorRamp2(c(-2,0,2), c("blue","white","red")),
              top_annotation=col_anno, show_column_names=FALSE, show_row_names=TRUE,
              row_names_gp=gpar(fontsize=6), cluster_columns=TRUE, cluster_rows=TRUE,
              column_title="CD14+ Monocytes: Infected vs Non-infected")
draw(ht)

# -----------------------------
# 5. Venn Diagram
# -----------------------------
cd16_subset <- subset(seurat_combined, subset=predicted.celltype.l2=="CD16 Mono")
Idents(cd16_subset) <- "Infection"
cd16_degs <- FindMarkers(cd16_subset, ident.1="Inf", ident.2="NI")

cd14_up <- rownames(subset(cd14_degs, avg_log2FC>0 & p_val_adj<0.05))
cd16_up <- rownames(subset(cd16_degs, avg_log2FC>0 & p_val_adj<0.05))

venn.plot <- venn.diagram(
  x=list("CD14+ Mono"=cd14_up, "CD16+ Mono"=cd16_up),
  filename=NULL, fill=c("#E74C3C","#3498DB"), alpha=0.5,
  cex=2, cat.cex=1.5, cat.pos=c(-20,20),
  main="Upregulated Genes in Infected Cells"
)
grid.draw(venn.plot)

# -----------------------------
# 6. Violin Plots for IFN Genes
# -----------------------------
ifn_genes <- c("IFNG","IFNB1","STAT1","STAT2","IRF1","IRF7","MX1","ISG15")
ifn_genes <- ifn_genes[ifn_genes %in% rownames(seurat_combined)]
mono_subset <- subset(seurat_combined, subset=predicted.celltype.l2 %in% c("CD14 Mono","CD16 Mono"))

plots <- lapply(ifn_genes, function(gene){
  VlnPlot(mono_subset, features=gene, split.by="Infection", group.by="predicted.celltype.l2",
          pt.size=0, cols=c("Infected"="#E74C3C","Non-infected"="#3498DB")) +
    theme(legend.position="none", axis.title.x=element_blank())
})
wrap_plots(plots, ncol=4)

# -----------------------------
# 7. Cell Count Bar Plot
# -----------------------------
cell_counts <- seurat_combined@meta.data %>%
  group_by(predicted.celltype.l2, Infection) %>%
  summarise(count=n(), .groups="drop")

ggplot(cell_counts, aes(x=predicted.celltype.l2, y=count, fill=Infection)) +
  geom_bar(stat="identity", position="dodge") +
  scale_fill_manual(values=c("Infected"="#E74C3C","Non-infected"="#3498DB")) +
  theme_classic() +
  theme(axis.text.x=element_text(angle=45,hjust=1,size=10),
        axis.text.y=element_text(size=10),
        axis.title=element_text(size=12,face="bold"),
        legend.position="top") +
  labs(x="Cell Type", y="Number of Cells", title="Cell Distribution by Infection Status")

# -----------------------------
# 8. Cell Proportion Comparison
# -----------------------------
cell_props <- seurat_combined@meta.data %>%
  group_by(Infection, predicted.celltype.l2) %>%
  summarise(count=n(), .groups="drop") %>%
  group_by(Infection) %>%
  mutate(proportion=count/sum(count)*100)

ggplot(cell_props, aes(x=predicted.celltype.l2, y=proportion, fill=Infection)) +
  geom_bar(stat="identity", position="dodge") +
  scale_fill_manual(values=c("Infected"="#E74C3C","Non-infected"="#3498DB")) +
  theme_classic() +
  theme(axis.text.x=element_text(angle=45,hjust=1,size=10),
        axis.text.y=element_text(size=10),
        axis.title=element_text(size=12,face="bold"),
        legend.position="top") +
  labs(x="Cell Type", y="Proportion (%)", title="Cell Type Proportions by Infection Status")