title: “pseudo-bulk analysis” output: html_notebook
library(ExperimentHub)
library(Seurat)
library(DESeq2)
library(tidyverse)
# get data
eh <- ExperimentHub()
query(eh, "Kang")
sce <- eh[["EH2259"]]
seu.obj <- as.Seurat(sce, data = NULL)
View(seu.obj@meta.data)
# QC and filtering
# explore QC
# get mito percent
seu.obj$mitoPercent <- PercentageFeatureSet(seu.obj, pattern = '^MT-')
View(seu.obj@meta.data)
# filter
seu.filtered <- subset(seu.obj, subset = nFeature_originalexp > 200 & nFeature_originalexp < 2500 &
nCount_originalexp > 800 &
mitoPercent < 5 &
multiplets == 'singlet')
seu.obj
seu.filtered
# run Seurat's standard workflow steps
seu.filtered <- NormalizeData(seu.filtered)
seu.filtered <- FindVariableFeatures(seu.filtered)
seu.filtered <- ScaleData(seu.filtered)
seu.filtered <- RunPCA(seu.filtered)
ElbowPlot(seu.filtered)
seu.filtered <- RunUMAP(seu.filtered, dims = 1:20)
# visualize
cell_plot <- DimPlot(seu.filtered, reduction = 'umap', group.by = 'cell', label = TRUE)
cond_plot <- DimPlot(seu.filtered, reduction = 'umap', group.by = 'stim')
cell_plot|cond_plot
# pseudo-bulk workflow -----------------
# Acquiring necessary metrics for aggregation across cells in a sample
# 1. counts matrix - sample level
# counts aggregate to sample level
View(seu.filtered@meta.data)
seu.filtered$samples <- paste0(seu.filtered$stim, seu.filtered$ind)
DefaultAssay(seu.filtered)
cts <- AggregateExpression(seu.filtered,
group.by = c("cell", "samples"),
assays = 'originalexp',
slot = "counts",
return.seurat = FALSE)
cts <- cts$originalexp
# transpose
cts.t <- t(cts)
# convert to data.frame
cts.t <- as.data.frame(cts.t)
# get values where to split
splitRows <- gsub('_.*', '', rownames(cts.t))
# split data.frame
cts.split <- split.data.frame(cts.t,
f = factor(splitRows))
# fix colnames and transpose
cts.split.modified <- lapply(cts.split, function(x){
rownames(x) <- gsub('.*_(.*)', '\\1', rownames(x))
t(x)
})
#gsub('.*_(.*)', '\\1', 'B cells_ctrl101')
# Let's run DE analysis with B cells
# 1. Get counts matrix
counts_bcell <- cts.split.modified$`B cells`
# 2. generate sample level metadata
colData <- data.frame(samples = colnames(counts_bcell))
colData <- colData %>%
mutate(condition = ifelse(grepl('stim', samples), 'Stimulated', 'Control')) %>%
column_to_rownames(var = 'samples')
# get more information from metadata
# perform DESeq2 --------
# Create DESeq2 object
dds <- DESeqDataSetFromMatrix(countData = counts_bcell,
colData = colData,
design = ~ condition)
# filter
keep <- rowSums(counts(dds)) >=10
dds <- dds[keep,]
# run DESeq2
dds <- DESeq(dds)
# Check the coefficients for the comparison
resultsNames(dds)
# Generate results object
res <- results(dds, name = "condition_Stimulated_vs_Control")
res
LS0tCm91dHB1dDoKICBodG1sX25vdGVib29rOiBkZWZhdWx0CiAgaHRtbF9kb2N1bWVudDogZGVmYXVsdAotLS0KLS0tCnRpdGxlOiAicHNldWRvLWJ1bGsgYW5hbHlzaXMiCm91dHB1dDogaHRtbF9ub3RlYm9vawoKYGBge3J9CmxpYnJhcnkoRXhwZXJpbWVudEh1YikKbGlicmFyeShTZXVyYXQpCmxpYnJhcnkoREVTZXEyKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKCgojIGdldCBkYXRhCmVoIDwtIEV4cGVyaW1lbnRIdWIoKQpxdWVyeShlaCwgIkthbmciKQoKc2NlIDwtIGVoW1siRUgyMjU5Il1dCnNldS5vYmogPC0gYXMuU2V1cmF0KHNjZSwgZGF0YSA9IE5VTEwpClZpZXcoc2V1Lm9iakBtZXRhLmRhdGEpCgoKIyBRQyBhbmQgZmlsdGVyaW5nCiMgZXhwbG9yZSBRQwoKCiMgZ2V0IG1pdG8gcGVyY2VudApzZXUub2JqJG1pdG9QZXJjZW50IDwtIFBlcmNlbnRhZ2VGZWF0dXJlU2V0KHNldS5vYmosIHBhdHRlcm4gPSAnXk1ULScpClZpZXcoc2V1Lm9iakBtZXRhLmRhdGEpCgojIGZpbHRlcgpzZXUuZmlsdGVyZWQgPC0gc3Vic2V0KHNldS5vYmosIHN1YnNldCA9IG5GZWF0dXJlX29yaWdpbmFsZXhwID4gMjAwICYgbkZlYXR1cmVfb3JpZ2luYWxleHAgPCAyNTAwICYKICAgICAgICAgbkNvdW50X29yaWdpbmFsZXhwID4gODAwICYgCiAgICAgICAgIG1pdG9QZXJjZW50IDwgNSAmCiAgICAgICAgIG11bHRpcGxldHMgPT0gJ3NpbmdsZXQnKQoKc2V1Lm9iagpzZXUuZmlsdGVyZWQKCiMgcnVuIFNldXJhdCdzIHN0YW5kYXJkIHdvcmtmbG93IHN0ZXBzCnNldS5maWx0ZXJlZCA8LSBOb3JtYWxpemVEYXRhKHNldS5maWx0ZXJlZCkKc2V1LmZpbHRlcmVkIDwtIEZpbmRWYXJpYWJsZUZlYXR1cmVzKHNldS5maWx0ZXJlZCkKc2V1LmZpbHRlcmVkIDwtIFNjYWxlRGF0YShzZXUuZmlsdGVyZWQpCnNldS5maWx0ZXJlZCA8LSBSdW5QQ0Eoc2V1LmZpbHRlcmVkKQpFbGJvd1Bsb3Qoc2V1LmZpbHRlcmVkKQpzZXUuZmlsdGVyZWQgPC0gUnVuVU1BUChzZXUuZmlsdGVyZWQsIGRpbXMgPSAxOjIwKQoKIyB2aXN1YWxpemUgCmNlbGxfcGxvdCA8LSBEaW1QbG90KHNldS5maWx0ZXJlZCwgcmVkdWN0aW9uID0gJ3VtYXAnLCBncm91cC5ieSA9ICdjZWxsJywgbGFiZWwgPSBUUlVFKQpjb25kX3Bsb3QgPC0gRGltUGxvdChzZXUuZmlsdGVyZWQsIHJlZHVjdGlvbiA9ICd1bWFwJywgZ3JvdXAuYnkgPSAnc3RpbScpCgpjZWxsX3Bsb3R8Y29uZF9wbG90CgojIHBzZXVkby1idWxrIHdvcmtmbG93IC0tLS0tLS0tLS0tLS0tLS0tCiMgQWNxdWlyaW5nIG5lY2Vzc2FyeSBtZXRyaWNzIGZvciBhZ2dyZWdhdGlvbiBhY3Jvc3MgY2VsbHMgaW4gYSBzYW1wbGUKIyAxLiBjb3VudHMgbWF0cml4IC0gc2FtcGxlIGxldmVsCiMgY291bnRzIGFnZ3JlZ2F0ZSB0byBzYW1wbGUgbGV2ZWwKClZpZXcoc2V1LmZpbHRlcmVkQG1ldGEuZGF0YSkKc2V1LmZpbHRlcmVkJHNhbXBsZXMgPC0gcGFzdGUwKHNldS5maWx0ZXJlZCRzdGltLCBzZXUuZmlsdGVyZWQkaW5kKQoKRGVmYXVsdEFzc2F5KHNldS5maWx0ZXJlZCkKCmN0cyA8LSBBZ2dyZWdhdGVFeHByZXNzaW9uKHNldS5maWx0ZXJlZCwgCiAgICAgICAgICAgICAgICAgICAgZ3JvdXAuYnkgPSBjKCJjZWxsIiwgInNhbXBsZXMiKSwKICAgICAgICAgICAgICAgICAgICBhc3NheXMgPSAnb3JpZ2luYWxleHAnLAogICAgICAgICAgICAgICAgICAgIHNsb3QgPSAiY291bnRzIiwKICAgICAgICAgICAgICAgICAgICByZXR1cm4uc2V1cmF0ID0gRkFMU0UpCgpjdHMgPC0gY3RzJG9yaWdpbmFsZXhwCgojIHRyYW5zcG9zZQpjdHMudCA8LSB0KGN0cykKCgojIGNvbnZlcnQgdG8gZGF0YS5mcmFtZQpjdHMudCA8LSBhcy5kYXRhLmZyYW1lKGN0cy50KQoKIyBnZXQgdmFsdWVzIHdoZXJlIHRvIHNwbGl0CnNwbGl0Um93cyA8LSBnc3ViKCdfLionLCAnJywgcm93bmFtZXMoY3RzLnQpKQoKCiMgc3BsaXQgZGF0YS5mcmFtZQpjdHMuc3BsaXQgPC0gc3BsaXQuZGF0YS5mcmFtZShjdHMudCwKICAgICAgICAgICAgICAgICBmID0gZmFjdG9yKHNwbGl0Um93cykpCgojIGZpeCBjb2xuYW1lcyBhbmQgdHJhbnNwb3NlCgpjdHMuc3BsaXQubW9kaWZpZWQgPC0gbGFwcGx5KGN0cy5zcGxpdCwgZnVuY3Rpb24oeCl7CiAgcm93bmFtZXMoeCkgPC0gZ3N1YignLipfKC4qKScsICdcXDEnLCByb3duYW1lcyh4KSkKICB0KHgpCiAgCn0pCgojZ3N1YignLipfKC4qKScsICdcXDEnLCAnQiBjZWxsc19jdHJsMTAxJykKCgoKIyBMZXQncyBydW4gREUgYW5hbHlzaXMgd2l0aCBCIGNlbGxzCiMgMS4gR2V0IGNvdW50cyBtYXRyaXgKY291bnRzX2JjZWxsIDwtIGN0cy5zcGxpdC5tb2RpZmllZCRgQiBjZWxsc2AKCgojIDIuIGdlbmVyYXRlIHNhbXBsZSBsZXZlbCBtZXRhZGF0YQpjb2xEYXRhIDwtIGRhdGEuZnJhbWUoc2FtcGxlcyA9IGNvbG5hbWVzKGNvdW50c19iY2VsbCkpCgpjb2xEYXRhIDwtIGNvbERhdGEgJT4lCiAgbXV0YXRlKGNvbmRpdGlvbiA9IGlmZWxzZShncmVwbCgnc3RpbScsIHNhbXBsZXMpLCAnU3RpbXVsYXRlZCcsICdDb250cm9sJykpICU+JQogIGNvbHVtbl90b19yb3duYW1lcyh2YXIgPSAnc2FtcGxlcycpCgojIGdldCBtb3JlIGluZm9ybWF0aW9uIGZyb20gbWV0YWRhdGEKCgoKCiMgcGVyZm9ybSBERVNlcTIgLS0tLS0tLS0KIyBDcmVhdGUgREVTZXEyIG9iamVjdCAgIApkZHMgPC0gREVTZXFEYXRhU2V0RnJvbU1hdHJpeChjb3VudERhdGEgPSBjb3VudHNfYmNlbGwsCiAgICAgICAgICAgICAgICAgICAgICAgY29sRGF0YSA9IGNvbERhdGEsCiAgICAgICAgICAgICAgICAgICAgICAgZGVzaWduID0gfiBjb25kaXRpb24pCgojIGZpbHRlcgprZWVwIDwtIHJvd1N1bXMoY291bnRzKGRkcykpID49MTAKZGRzIDwtIGRkc1trZWVwLF0KCiMgcnVuIERFU2VxMgpkZHMgPC0gREVTZXEoZGRzKQoKCgojIENoZWNrIHRoZSBjb2VmZmljaWVudHMgZm9yIHRoZSBjb21wYXJpc29uCnJlc3VsdHNOYW1lcyhkZHMpCgojIEdlbmVyYXRlIHJlc3VsdHMgb2JqZWN0CnJlcyA8LSByZXN1bHRzKGRkcywgbmFtZSA9ICJjb25kaXRpb25fU3RpbXVsYXRlZF92c19Db250cm9sIikKcmVzCmBgYAoK