Comprehensive Guide to Cell Deconvolution with Multiple Methods

Lucas TRAN

10/09/2025

This guide covers MCP-counter, CIBERSORT, TIMER, and EPIC with complete solutions for common issues.

🏗️ Installation and Setup

# Install required packages
if (!require("BiocManager")) install.packages("BiocManager")
if (!require("devtools")) install.packages("devtools")

# Core packages
BiocManager::install(c(
  "immunedeconv",
  "edgeR",
  "EPIC",
  "org.Hs.eg.db",
  "biomaRt"
))

# GitHub packages
devtools::install_github("ebecht/MCPcounter", ref="master", subdir="Source")
devtools::install_github("dviraran/xCell")

# For TIMER (if available)
devtools::install_github("ebedford/immunedeconv")

📊 Data Preparation Template

prepare_expression_data <- function(count_matrix) {
  # 1. Convert to matrix and clean
  if (!is.matrix(count_matrix)) {
    count_matrix <- as.matrix(count_matrix)
  }
  
  # 2. Remove NA and non-numeric values
  count_matrix[is.na(count_matrix)] <- 0
  count_matrix[!is.numeric(count_matrix)] <- 0
  
  # 3. Handle duplicate genes (take mean)
  if (any(duplicated(rownames(count_matrix)))) {
    count_matrix <- aggregate(count_matrix, 
                             by = list(rownames(count_matrix)), 
                             FUN = mean)
    rownames(count_matrix) <- count_matrix$Group.1
    count_matrix$Group.1 <- NULL
    count_matrix <- as.matrix(count_matrix)
  }
  
  # 4. Convert to TPM (required for most methods)
  dge <- DGEList(counts = count_matrix)
  dge <- calcNormFactors(dge)
  tpm_matrix <- cpm(dge, log = FALSE)
  
  # 5. Ensure proper gene symbols
  rownames(tpm_matrix) <- gsub("\\..*", "", rownames(tpm_matrix))
  
  return(tpm_matrix)
}

# Usage:
# tpm_matrix <- prepare_expression_data(your_count_matrix)

🔧 Method-Specific Solutions

1. MCP-counter

run_mcpcounter <- function(tpm_matrix) {
  # Source MCPcounter directly
  source("https://raw.githubusercontent.com/ebecht/MCPcounter/master/Source/R/MCPcounter.R")
  
  # Get genesets
  genes <- read.table("https://raw.githubusercontent.com/ebecht/MCPcounter/master/Source/genes.txt", 
                     sep = "\t", stringsAsFactors = FALSE, header = TRUE, colClasses = "character", 
                     check.names = FALSE)
  
  # Run MCP-counter
  results <- MCPcounter.estimate(
    expression = tpm_matrix,
    featuresType = "HUGO_symbols",
    probesets = genes$probesets,
    genes = genes$HUGO symbols
  )
  
  return(results)
}

# Alternative using immunedeconv wrapper
run_mcpcounter_wrapper <- function(tpm_matrix) {
  result <- immunedeconv::deconvolute(tpm_matrix, "mcp_counter")
  return(result)
}

2. CIBERSORT Solutions

# Solution A: Using immunedeconv with manual setup
setup_cibersort <- function(cibersort_path, signature_matrix_path) {
  if (!file.exists(cibersort_path)) {
    stop("CIBERSORT.R not found. Download from https://cibersort.stanford.edu/")
  }
  if (!file.exists(signature_matrix_path)) {
    stop("Signature matrix not found. Typically LM22.txt for immune cells")
  }
  
  set_cibersort_binary(cibersort_path)
  set_cibersort_mat(signature_matrix_path)
}

run_cibersort <- function(tpm_matrix) {
  result <- immunedeconv::deconvolute(tpm_matrix, "cibersort")
  return(result)
}

# Solution B: EPIC as CIBERSORT alternative
run_epic_as_cibersort_alternative <- function(tpm_matrix) {
  library(EPIC)
  result <- EPIC(bulk = tpm_matrix)
  return(result$cellFractions)
}

3. TIMER

run_timer <- function(tpm_matrix) {
  # TIMER through immunedeconv
  result <- immunedeconv::deconvolute(tpm_matrix, "timer")
  
  # If that fails, try manual implementation
  if (inherits(result, "try-error")) {
    # TIMER requires specific cancer type mapping
    # This is a simplified approach
    timer_result <- immunedeconv::deconvolute_timer(
      tpm_matrix,
      indications = rep("brca", ncol(tpm_matrix))  # Adjust cancer type
    )
    return(timer_result)
  }
  
  return(result)
}

4. EPIC

run_epic <- function(tpm_matrix) {
  library(EPIC)
  
  # Run EPIC with default reference
  result <- EPIC(bulk = tpm_matrix)
  
  # Return both cell fractions and other metrics
  return(list(
    cellFractions = result$cellFractions,
    mRNA_cell = result$mRNA_cell,
    fit_gof = result$fit.gof
  ))
}

🎯 Integrated Deconvolution Pipeline

run_comprehensive_deconvolution <- function(count_matrix, 
                                          methods = c("epic", "quantiseq", "mcp_counter", "xcell")) {
  
  # Prepare data
  tpm_matrix <- prepare_expression_data(count_matrix)
  
  results <- list()
  
  # Run each method with error handling
  for (method in methods) {
    cat("Running", method, "...\n")
    
    tryCatch({
      if (method == "epic") {
        results[[method]] <- run_epic(tpm_matrix)
      } else if (method == "mcp_counter") {
        results[[method]] <- run_mcpcounter_wrapper(tpm_matrix)
      } else if (method == "timer") {
        results[[method]] <- run_timer(tpm_matrix)
      } else {
        results[[method]] <- immunedeconv::deconvolute(tpm_matrix, method)
      }
      cat("✓", method, "completed successfully\n")
    }, error = function(e) {
      cat("✗", method, "failed:", e$message, "\n")
      results[[method]] <- NULL
    })
  }
  
  return(results)
}

# Usage:
# deconv_results <- run_comprehensive_deconvolution(your_count_matrix)

📈 Visualization and Comparison

visualize_deconvolution <- function(results) {
  library(ggplot2)
  library(tidyr)
  library(dplyr)
  library(patchwork)
  
  plots <- list()
  
  # Create plots for each method
  for (method_name in names(results)) {
    if (!is.null(results[[method_name]])) {
      
      # Extract data based on method output structure
      if (method_name == "epic") {
        data <- as.data.frame(results[[method_name]]$cellFractions)
        data$cell_type <- rownames(data)
      } else if (is.matrix(results[[method_name]])) {
        data <- as.data.frame(t(results[[method_name]]))
        data$cell_type <- rownames(data)
      } else {
        data <- results[[method_name]]
      }
      
      # Convert to long format
      data_long <- pivot_longer(data, 
                               cols = -cell_type, 
                               names_to = "sample", 
                               values_to = "fraction")
      
      # Create plot
      p <- ggplot(data_long, aes(x = sample, y = fraction, fill = cell_type)) +
        geom_bar(stat = "identity") +
        theme_minimal() +
        theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
        labs(title = paste("Deconvolution:", method_name),
             x = "Sample", y = "Cell Fraction") +
        scale_fill_viridis_d()
      
      plots[[method_name]] <- p
    }
  }
  
  # Arrange plots
  if (length(plots) > 0) {
    wrap_plots(plots, ncol = 2)
  }
}

# Correlation analysis between methods
compare_methods <- function(results) {
  library(corrplot)
  
  # Extract common cell types across methods
  common_data <- list()
  
  for (method in names(results)) {
    if (!is.null(results[[method]])) {
      if (method == "epic") {
        common_data[[method]] <- results[[method]]$cellFractions
      } else if (is.matrix(results[[method]])) {
        common_data[[method]] <- results[[method]]
      }
    }
  }
  
  # Create correlation matrix (simplified example)
  cor_matrix <- cor(common_data[[1]], common_data[[2]])
  corrplot(cor_matrix, method = "color")
}

🚀 Complete Workflow Example

# Complete example with mock data
run_example_workflow <- function() {
  # Create mock data
  set.seed(123)
  mock_counts <- matrix(rnbinom(500*30, mu=100, size=1), nrow=500, ncol=30)
  
  # Use real gene symbols
  library(org.Hs.eg.db)
  gene_symbols <- select(org.Hs.eg.db, 
                        keys = paste0("ENSG0000", 1000:1499),
                        keytype = "ENSEMBL", 
                        columns = "SYMBOL")$SYMBOL
  gene_symbols <- ifelse(is.na(gene_symbols), paste0("GENE_", 1:500), gene_symbols)
  
  rownames(mock_counts) <- gene_symbols
  colnames(mock_counts) <- paste0("Sample_", 1:30)
  
  # Run deconvolution
  results <- run_comprehensive_deconvolution(
    mock_counts,
    methods = c("epic", "quantiseq", "mcp_counter", "xcell")
  )
  
  # Visualize
  visualize_deconvolution(results)
  
  # Save results
  saveRDS(results, "deconvolution_results.rds")
  
  return(results)
}

# Execute the workflow
# deconv_results <- run_example_workflow()

⚠️ Troubleshooting Guide

troubleshoot_deconvolution <- function() {
  cat("Common issues and solutions:\n")
  cat("1. CIBERSORT errors: Use EPIC or quanTIseq as alternatives\n")
  cat("2. xCell errors: Ensure proper HUGO symbols and TPM normalization\n")
  cat("3. TIMER errors: Specify correct cancer type indications\n")
  cat("4. Memory issues: Subsample genes or use smaller signature matrices\n")
  cat("5. Gene symbol issues: Use biomaRt for ID conversion\n")
  
  # Gene ID conversion example
  convert_gene_ids <- function(expression_matrix) {
    library(biomaRt)
    mart <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
    
    genes <- rownames(expression_matrix)
    gene_map <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol"),
                     filters = "ensembl_gene_id",
                     values = genes,
                     mart = mart)
    
    # Merge and aggregate
    merged <- merge(data.frame(ensembl_gene_id = genes), gene_map, all.x = TRUE)
    merged$hgnc_symbol <- ifelse(is.na(merged$hgnc_symbol), merged$ensembl_gene_id, merged$hgnc_symbol)
    
    expression_matrix <- aggregate(expression_matrix, 
                                  by = list(merged$hgnc_symbol), 
                                  FUN = mean)
    rownames(expression_matrix) <- expression_matrix$Group.1
    expression_matrix$Group.1 <- NULL
    
    return(as.matrix(expression_matrix))
  }
}

💾 Saving and Loading Results

save_deconvolution_results <- function(results, filename) {
  saveRDS(results, file = filename)
  cat("Results saved to", filename, "\n")
}

load_deconvolution_results <- function(filename) {
  results <- readRDS(filename)
  return(results)
}

# Export to CSV for sharing
export_to_csv <- function(results, prefix = "deconvolution") {
  for (method in names(results)) {
    if (!is.null(results[[method]])) {
      if (method == "epic") {
        write.csv(results[[method]]$cellFractions, 
                 file = paste0(prefix, "_", method, ".csv"))
      } else {
        write.csv(results[[method]], 
                 file = paste0(prefix, "_", method, ".csv"))
      }
    }
  }
}

Complete workflow function

run_complete_deconvolution_workflow <- function(n_genes = 1000, n_samples = 50, n_cell_types = 6) {
  # Step 1: Generate simulated data
  cat("Generating simulated data...\n")
  sim_data <- generate_simulated_data(n_genes, n_samples, n_cell_types)
  
  # Step 2: Prepare data
  cat("Preparing data...\n")
  tpm_matrix <- prepare_data(sim_data$bulk_expression)
  
  # Step 3: Run deconvolution methods
  cat("Running deconvolution methods...\n")
  deconvolution_results <- run_all_deconvolution_methods(tpm_matrix, sim_data$signature_matrix)
  
  # Step 4: Evaluate results
  cat("Evaluating results...\n")
  evaluation_results <- data.frame()
  for (method_name in names(deconvolution_results)) {
    if (!is.null(deconvolution_results[[method_name]])) {
      eval_df <- evaluate_deconvolution(
        sim_data$true_proportions, 
        deconvolution_results[[method_name]], 
        method_name
      )
      evaluation_results <- rbind(evaluation_results, eval_df)
    }
  }
  
  # Step 5: Create visualizations
  cat("Creating visualizations...\n")
  plots <- list()
  for (method_name in names(deconvolution_results)) {
    if (!is.null(deconvolution_results[[method_name]])) {
      plots[[method_name]] <- plot_deconvolution_results(
        sim_data$true_proportions, 
        deconvolution_results[[method_name]], 
        method_name
      )
    }
  }
  
  # Step 6: Return results
  return(list(
    sim_data = sim_data,
    deconvolution_results = deconvolution_results,
    evaluation_results = evaluation_results,
    plots = plots
  ))
}

# Run the complete workflow
results <- run_complete_deconvolution_workflow(n_genes = 1000, n_samples = 50, n_cell_types = 6)

# Print evaluation results
print(results$evaluation_results)

# Display plots
if (length(results$plots) > 0) {
  combined_plot <- wrap_plots(results$plots, ncol = 2)
  print(combined_plot)
}