Here we show an example to use the est_frac() function in the MIND package to estimate cell type fractions using non-negative least squares (NNLS). We deconvolve the GTEx brain bulk data with the signature matrix used in our paper.
library(data.table)
gtex_sample = read.delim('https://storage.googleapis.com/gtex_analysis_v7/annotations/GTEx_v7_Annotations_SampleAttributesDS.txt', header = T, stringsAsFactors = F)
gtex_sample = gtex_sample[gtex_sample$SMTS == 'Brain',]
download.file('https://storage.googleapis.com/gtex_analysis_v7/rna_seq_data/GTEx_Analysis_2016-01-15_v7_RNASeQCv1.1.8_gene_reads.gct.gz', 'GTEx_Analysis_2016-01-15_v7_RNASeQCv1.1.8_gene_reads.gct.gz')
rnaseq_sample_id = fread('GTEx_Analysis_2016-01-15_v7_RNASeQCv1.1.8_gene_reads.gct.gz', header = T, skip = 2, nrows = 1)
gtex_sample = gtex_sample[gtex_sample$SAMPID %in% colnames(rnaseq_sample_id),]
# read count
gtex_count <- fread('GTEx_Analysis_2016-01-15_v7_RNASeQCv1.1.8_gene_reads.gct.gz', header = T, skip = 2, select = c('Description', gtex_sample$SAMPID))
gtex_count = as.data.frame(gtex_count)
gtex_count = gtex_count[!duplicated(gtex_count$Description), ]
rownames(gtex_count) = gtex_count$Description
gtex_count$Description = NULL
gtex_cpm = apply(gtex_count, 2, function(x) x/sum(x)*1e6)
dim(gtex_cpm)
## [1] 54271 1671
signature = openxlsx::read.xlsx('https://www.dropbox.com/s/vdhcousnkzbyoyp/Table%20S1%20Signature_matrix_Darmanis.xlsx?raw=1')
rownames(signature) = signature[,1]
signature[,1] = NULL
dim(signature)
## [1] 754 6
est_frac()sig = log2(as.matrix(signature) + 1)
bulk = log2(gtex_cpm[rownames(sig),] + 1)
# install MIND if not installed
if(!require('MIND', character.only = TRUE)) devtools::install_github('randel/MIND')
library(MIND)
cell_fraction = est_frac(sig, bulk)
## Astrocyte Endothelial Microglia Excitatory Inhibitory Oligo
## 0.20 0.17 0.06 0.22 0.26 0.11
We show the heatmap of the average cell fraction for each brain region.
region = substr(gtex_sample$SMTSD, 9, 100)
frac_region = apply(cell_fraction, 2, function(x) tapply(x, region, mean))
library(NMF)
aheatmap(frac_region, color = 'Blues')