This guide covers MCP-counter, CIBERSORT, TIMER, and EPIC with complete solutions for common issues.
# Install required packages
if (!require("BiocManager")) install.packages("BiocManager")
if (!require("devtools")) install.packages("devtools")
# Core packages
BiocManager::install(c(
"immunedeconv",
"edgeR",
"EPIC",
"org.Hs.eg.db",
"biomaRt"
))
# GitHub packages
devtools::install_github("ebecht/MCPcounter", ref="master", subdir="Source")
devtools::install_github("dviraran/xCell")
# For TIMER (if available)
devtools::install_github("ebedford/immunedeconv")
prepare_expression_data <- function(count_matrix) {
# 1. Convert to matrix and clean
if (!is.matrix(count_matrix)) {
count_matrix <- as.matrix(count_matrix)
}
# 2. Remove NA and non-numeric values
count_matrix[is.na(count_matrix)] <- 0
count_matrix[!is.numeric(count_matrix)] <- 0
# 3. Handle duplicate genes (take mean)
if (any(duplicated(rownames(count_matrix)))) {
count_matrix <- aggregate(count_matrix,
by = list(rownames(count_matrix)),
FUN = mean)
rownames(count_matrix) <- count_matrix$Group.1
count_matrix$Group.1 <- NULL
count_matrix <- as.matrix(count_matrix)
}
# 4. Convert to TPM (required for most methods)
dge <- DGEList(counts = count_matrix)
dge <- calcNormFactors(dge)
tpm_matrix <- cpm(dge, log = FALSE)
# 5. Ensure proper gene symbols
rownames(tpm_matrix) <- gsub("\\..*", "", rownames(tpm_matrix))
return(tpm_matrix)
}
# Usage:
# tpm_matrix <- prepare_expression_data(your_count_matrix)
run_mcpcounter <- function(tpm_matrix) {
# Source MCPcounter directly
source("https://raw.githubusercontent.com/ebecht/MCPcounter/master/Source/R/MCPcounter.R")
# Get genesets
genes <- read.table("https://raw.githubusercontent.com/ebecht/MCPcounter/master/Source/genes.txt",
sep = "\t", stringsAsFactors = FALSE, header = TRUE, colClasses = "character",
check.names = FALSE)
# Run MCP-counter
results <- MCPcounter.estimate(
expression = tpm_matrix,
featuresType = "HUGO_symbols",
probesets = genes$probesets,
genes = genes$HUGO symbols
)
return(results)
}
# Alternative using immunedeconv wrapper
run_mcpcounter_wrapper <- function(tpm_matrix) {
result <- immunedeconv::deconvolute(tpm_matrix, "mcp_counter")
return(result)
}
# Solution A: Using immunedeconv with manual setup
setup_cibersort <- function(cibersort_path, signature_matrix_path) {
if (!file.exists(cibersort_path)) {
stop("CIBERSORT.R not found. Download from https://cibersort.stanford.edu/")
}
if (!file.exists(signature_matrix_path)) {
stop("Signature matrix not found. Typically LM22.txt for immune cells")
}
set_cibersort_binary(cibersort_path)
set_cibersort_mat(signature_matrix_path)
}
run_cibersort <- function(tpm_matrix) {
result <- immunedeconv::deconvolute(tpm_matrix, "cibersort")
return(result)
}
# Solution B: EPIC as CIBERSORT alternative
run_epic_as_cibersort_alternative <- function(tpm_matrix) {
library(EPIC)
result <- EPIC(bulk = tpm_matrix)
return(result$cellFractions)
}
run_timer <- function(tpm_matrix) {
# TIMER through immunedeconv
result <- immunedeconv::deconvolute(tpm_matrix, "timer")
# If that fails, try manual implementation
if (inherits(result, "try-error")) {
# TIMER requires specific cancer type mapping
# This is a simplified approach
timer_result <- immunedeconv::deconvolute_timer(
tpm_matrix,
indications = rep("brca", ncol(tpm_matrix)) # Adjust cancer type
)
return(timer_result)
}
return(result)
}
run_epic <- function(tpm_matrix) {
library(EPIC)
# Run EPIC with default reference
result <- EPIC(bulk = tpm_matrix)
# Return both cell fractions and other metrics
return(list(
cellFractions = result$cellFractions,
mRNA_cell = result$mRNA_cell,
fit_gof = result$fit.gof
))
}
run_comprehensive_deconvolution <- function(count_matrix,
methods = c("epic", "quantiseq", "mcp_counter", "xcell")) {
# Prepare data
tpm_matrix <- prepare_expression_data(count_matrix)
results <- list()
# Run each method with error handling
for (method in methods) {
cat("Running", method, "...\n")
tryCatch({
if (method == "epic") {
results[[method]] <- run_epic(tpm_matrix)
} else if (method == "mcp_counter") {
results[[method]] <- run_mcpcounter_wrapper(tpm_matrix)
} else if (method == "timer") {
results[[method]] <- run_timer(tpm_matrix)
} else {
results[[method]] <- immunedeconv::deconvolute(tpm_matrix, method)
}
cat("✓", method, "completed successfully\n")
}, error = function(e) {
cat("✗", method, "failed:", e$message, "\n")
results[[method]] <- NULL
})
}
return(results)
}
# Usage:
# deconv_results <- run_comprehensive_deconvolution(your_count_matrix)
visualize_deconvolution <- function(results) {
library(ggplot2)
library(tidyr)
library(dplyr)
library(patchwork)
plots <- list()
# Create plots for each method
for (method_name in names(results)) {
if (!is.null(results[[method_name]])) {
# Extract data based on method output structure
if (method_name == "epic") {
data <- as.data.frame(results[[method_name]]$cellFractions)
data$cell_type <- rownames(data)
} else if (is.matrix(results[[method_name]])) {
data <- as.data.frame(t(results[[method_name]]))
data$cell_type <- rownames(data)
} else {
data <- results[[method_name]]
}
# Convert to long format
data_long <- pivot_longer(data,
cols = -cell_type,
names_to = "sample",
values_to = "fraction")
# Create plot
p <- ggplot(data_long, aes(x = sample, y = fraction, fill = cell_type)) +
geom_bar(stat = "identity") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = paste("Deconvolution:", method_name),
x = "Sample", y = "Cell Fraction") +
scale_fill_viridis_d()
plots[[method_name]] <- p
}
}
# Arrange plots
if (length(plots) > 0) {
wrap_plots(plots, ncol = 2)
}
}
# Correlation analysis between methods
compare_methods <- function(results) {
library(corrplot)
# Extract common cell types across methods
common_data <- list()
for (method in names(results)) {
if (!is.null(results[[method]])) {
if (method == "epic") {
common_data[[method]] <- results[[method]]$cellFractions
} else if (is.matrix(results[[method]])) {
common_data[[method]] <- results[[method]]
}
}
}
# Create correlation matrix (simplified example)
cor_matrix <- cor(common_data[[1]], common_data[[2]])
corrplot(cor_matrix, method = "color")
}
# Complete example with mock data
run_example_workflow <- function() {
# Create mock data
set.seed(123)
mock_counts <- matrix(rnbinom(500*30, mu=100, size=1), nrow=500, ncol=30)
# Use real gene symbols
library(org.Hs.eg.db)
gene_symbols <- select(org.Hs.eg.db,
keys = paste0("ENSG0000", 1000:1499),
keytype = "ENSEMBL",
columns = "SYMBOL")$SYMBOL
gene_symbols <- ifelse(is.na(gene_symbols), paste0("GENE_", 1:500), gene_symbols)
rownames(mock_counts) <- gene_symbols
colnames(mock_counts) <- paste0("Sample_", 1:30)
# Run deconvolution
results <- run_comprehensive_deconvolution(
mock_counts,
methods = c("epic", "quantiseq", "mcp_counter", "xcell")
)
# Visualize
visualize_deconvolution(results)
# Save results
saveRDS(results, "deconvolution_results.rds")
return(results)
}
# Execute the workflow
# deconv_results <- run_example_workflow()
troubleshoot_deconvolution <- function() {
cat("Common issues and solutions:\n")
cat("1. CIBERSORT errors: Use EPIC or quanTIseq as alternatives\n")
cat("2. xCell errors: Ensure proper HUGO symbols and TPM normalization\n")
cat("3. TIMER errors: Specify correct cancer type indications\n")
cat("4. Memory issues: Subsample genes or use smaller signature matrices\n")
cat("5. Gene symbol issues: Use biomaRt for ID conversion\n")
# Gene ID conversion example
convert_gene_ids <- function(expression_matrix) {
library(biomaRt)
mart <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
genes <- rownames(expression_matrix)
gene_map <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol"),
filters = "ensembl_gene_id",
values = genes,
mart = mart)
# Merge and aggregate
merged <- merge(data.frame(ensembl_gene_id = genes), gene_map, all.x = TRUE)
merged$hgnc_symbol <- ifelse(is.na(merged$hgnc_symbol), merged$ensembl_gene_id, merged$hgnc_symbol)
expression_matrix <- aggregate(expression_matrix,
by = list(merged$hgnc_symbol),
FUN = mean)
rownames(expression_matrix) <- expression_matrix$Group.1
expression_matrix$Group.1 <- NULL
return(as.matrix(expression_matrix))
}
}
save_deconvolution_results <- function(results, filename) {
saveRDS(results, file = filename)
cat("Results saved to", filename, "\n")
}
load_deconvolution_results <- function(filename) {
results <- readRDS(filename)
return(results)
}
# Export to CSV for sharing
export_to_csv <- function(results, prefix = "deconvolution") {
for (method in names(results)) {
if (!is.null(results[[method]])) {
if (method == "epic") {
write.csv(results[[method]]$cellFractions,
file = paste0(prefix, "_", method, ".csv"))
} else {
write.csv(results[[method]],
file = paste0(prefix, "_", method, ".csv"))
}
}
}
}
run_complete_deconvolution_workflow <- function(n_genes = 1000, n_samples = 50, n_cell_types = 6) {
# Step 1: Generate simulated data
cat("Generating simulated data...\n")
sim_data <- generate_simulated_data(n_genes, n_samples, n_cell_types)
# Step 2: Prepare data
cat("Preparing data...\n")
tpm_matrix <- prepare_data(sim_data$bulk_expression)
# Step 3: Run deconvolution methods
cat("Running deconvolution methods...\n")
deconvolution_results <- run_all_deconvolution_methods(tpm_matrix, sim_data$signature_matrix)
# Step 4: Evaluate results
cat("Evaluating results...\n")
evaluation_results <- data.frame()
for (method_name in names(deconvolution_results)) {
if (!is.null(deconvolution_results[[method_name]])) {
eval_df <- evaluate_deconvolution(
sim_data$true_proportions,
deconvolution_results[[method_name]],
method_name
)
evaluation_results <- rbind(evaluation_results, eval_df)
}
}
# Step 5: Create visualizations
cat("Creating visualizations...\n")
plots <- list()
for (method_name in names(deconvolution_results)) {
if (!is.null(deconvolution_results[[method_name]])) {
plots[[method_name]] <- plot_deconvolution_results(
sim_data$true_proportions,
deconvolution_results[[method_name]],
method_name
)
}
}
# Step 6: Return results
return(list(
sim_data = sim_data,
deconvolution_results = deconvolution_results,
evaluation_results = evaluation_results,
plots = plots
))
}
# Run the complete workflow
results <- run_complete_deconvolution_workflow(n_genes = 1000, n_samples = 50, n_cell_types = 6)
# Print evaluation results
print(results$evaluation_results)
# Display plots
if (length(results$plots) > 0) {
combined_plot <- wrap_plots(results$plots, ncol = 2)
print(combined_plot)
}