title: “pseudo-bulk analysis” output: html_notebook

library(ExperimentHub)
library(Seurat)
library(DESeq2)
library(tidyverse)


# get data
eh <- ExperimentHub()
query(eh, "Kang")

sce <- eh[["EH2259"]]
seu.obj <- as.Seurat(sce, data = NULL)
View(seu.obj@meta.data)


# QC and filtering
# explore QC


# get mito percent
seu.obj$mitoPercent <- PercentageFeatureSet(seu.obj, pattern = '^MT-')
View(seu.obj@meta.data)

# filter
seu.filtered <- subset(seu.obj, subset = nFeature_originalexp > 200 & nFeature_originalexp < 2500 &
         nCount_originalexp > 800 & 
         mitoPercent < 5 &
         multiplets == 'singlet')

seu.obj
seu.filtered

# run Seurat's standard workflow steps
seu.filtered <- NormalizeData(seu.filtered)
seu.filtered <- FindVariableFeatures(seu.filtered)
seu.filtered <- ScaleData(seu.filtered)
seu.filtered <- RunPCA(seu.filtered)
ElbowPlot(seu.filtered)
seu.filtered <- RunUMAP(seu.filtered, dims = 1:20)

# visualize 
cell_plot <- DimPlot(seu.filtered, reduction = 'umap', group.by = 'cell', label = TRUE)
cond_plot <- DimPlot(seu.filtered, reduction = 'umap', group.by = 'stim')

cell_plot|cond_plot

# pseudo-bulk workflow -----------------
# Acquiring necessary metrics for aggregation across cells in a sample
# 1. counts matrix - sample level
# counts aggregate to sample level

View(seu.filtered@meta.data)
seu.filtered$samples <- paste0(seu.filtered$stim, seu.filtered$ind)

DefaultAssay(seu.filtered)

cts <- AggregateExpression(seu.filtered, 
                    group.by = c("cell", "samples"),
                    assays = 'originalexp',
                    slot = "counts",
                    return.seurat = FALSE)

cts <- cts$originalexp

# transpose
cts.t <- t(cts)


# convert to data.frame
cts.t <- as.data.frame(cts.t)

# get values where to split
splitRows <- gsub('_.*', '', rownames(cts.t))


# split data.frame
cts.split <- split.data.frame(cts.t,
                 f = factor(splitRows))

# fix colnames and transpose

cts.split.modified <- lapply(cts.split, function(x){
  rownames(x) <- gsub('.*_(.*)', '\\1', rownames(x))
  t(x)
  
})

#gsub('.*_(.*)', '\\1', 'B cells_ctrl101')



# Let's run DE analysis with B cells
# 1. Get counts matrix
counts_bcell <- cts.split.modified$`B cells`


# 2. generate sample level metadata
colData <- data.frame(samples = colnames(counts_bcell))

colData <- colData %>%
  mutate(condition = ifelse(grepl('stim', samples), 'Stimulated', 'Control')) %>%
  column_to_rownames(var = 'samples')

# get more information from metadata




# perform DESeq2 --------
# Create DESeq2 object   
dds <- DESeqDataSetFromMatrix(countData = counts_bcell,
                       colData = colData,
                       design = ~ condition)

# filter
keep <- rowSums(counts(dds)) >=10
dds <- dds[keep,]

# run DESeq2
dds <- DESeq(dds)



# Check the coefficients for the comparison
resultsNames(dds)

# Generate results object
res <- results(dds, name = "condition_Stimulated_vs_Control")
res
LS0tCm91dHB1dDoKICBodG1sX25vdGVib29rOiBkZWZhdWx0CiAgaHRtbF9kb2N1bWVudDogZGVmYXVsdAotLS0KLS0tCnRpdGxlOiAicHNldWRvLWJ1bGsgYW5hbHlzaXMiCm91dHB1dDogaHRtbF9ub3RlYm9vawoKYGBge3J9CmxpYnJhcnkoRXhwZXJpbWVudEh1YikKbGlicmFyeShTZXVyYXQpCmxpYnJhcnkoREVTZXEyKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKCgojIGdldCBkYXRhCmVoIDwtIEV4cGVyaW1lbnRIdWIoKQpxdWVyeShlaCwgIkthbmciKQoKc2NlIDwtIGVoW1siRUgyMjU5Il1dCnNldS5vYmogPC0gYXMuU2V1cmF0KHNjZSwgZGF0YSA9IE5VTEwpClZpZXcoc2V1Lm9iakBtZXRhLmRhdGEpCgoKIyBRQyBhbmQgZmlsdGVyaW5nCiMgZXhwbG9yZSBRQwoKCiMgZ2V0IG1pdG8gcGVyY2VudApzZXUub2JqJG1pdG9QZXJjZW50IDwtIFBlcmNlbnRhZ2VGZWF0dXJlU2V0KHNldS5vYmosIHBhdHRlcm4gPSAnXk1ULScpClZpZXcoc2V1Lm9iakBtZXRhLmRhdGEpCgojIGZpbHRlcgpzZXUuZmlsdGVyZWQgPC0gc3Vic2V0KHNldS5vYmosIHN1YnNldCA9IG5GZWF0dXJlX29yaWdpbmFsZXhwID4gMjAwICYgbkZlYXR1cmVfb3JpZ2luYWxleHAgPCAyNTAwICYKICAgICAgICAgbkNvdW50X29yaWdpbmFsZXhwID4gODAwICYgCiAgICAgICAgIG1pdG9QZXJjZW50IDwgNSAmCiAgICAgICAgIG11bHRpcGxldHMgPT0gJ3NpbmdsZXQnKQoKc2V1Lm9iagpzZXUuZmlsdGVyZWQKCiMgcnVuIFNldXJhdCdzIHN0YW5kYXJkIHdvcmtmbG93IHN0ZXBzCnNldS5maWx0ZXJlZCA8LSBOb3JtYWxpemVEYXRhKHNldS5maWx0ZXJlZCkKc2V1LmZpbHRlcmVkIDwtIEZpbmRWYXJpYWJsZUZlYXR1cmVzKHNldS5maWx0ZXJlZCkKc2V1LmZpbHRlcmVkIDwtIFNjYWxlRGF0YShzZXUuZmlsdGVyZWQpCnNldS5maWx0ZXJlZCA8LSBSdW5QQ0Eoc2V1LmZpbHRlcmVkKQpFbGJvd1Bsb3Qoc2V1LmZpbHRlcmVkKQpzZXUuZmlsdGVyZWQgPC0gUnVuVU1BUChzZXUuZmlsdGVyZWQsIGRpbXMgPSAxOjIwKQoKIyB2aXN1YWxpemUgCmNlbGxfcGxvdCA8LSBEaW1QbG90KHNldS5maWx0ZXJlZCwgcmVkdWN0aW9uID0gJ3VtYXAnLCBncm91cC5ieSA9ICdjZWxsJywgbGFiZWwgPSBUUlVFKQpjb25kX3Bsb3QgPC0gRGltUGxvdChzZXUuZmlsdGVyZWQsIHJlZHVjdGlvbiA9ICd1bWFwJywgZ3JvdXAuYnkgPSAnc3RpbScpCgpjZWxsX3Bsb3R8Y29uZF9wbG90CgojIHBzZXVkby1idWxrIHdvcmtmbG93IC0tLS0tLS0tLS0tLS0tLS0tCiMgQWNxdWlyaW5nIG5lY2Vzc2FyeSBtZXRyaWNzIGZvciBhZ2dyZWdhdGlvbiBhY3Jvc3MgY2VsbHMgaW4gYSBzYW1wbGUKIyAxLiBjb3VudHMgbWF0cml4IC0gc2FtcGxlIGxldmVsCiMgY291bnRzIGFnZ3JlZ2F0ZSB0byBzYW1wbGUgbGV2ZWwKClZpZXcoc2V1LmZpbHRlcmVkQG1ldGEuZGF0YSkKc2V1LmZpbHRlcmVkJHNhbXBsZXMgPC0gcGFzdGUwKHNldS5maWx0ZXJlZCRzdGltLCBzZXUuZmlsdGVyZWQkaW5kKQoKRGVmYXVsdEFzc2F5KHNldS5maWx0ZXJlZCkKCmN0cyA8LSBBZ2dyZWdhdGVFeHByZXNzaW9uKHNldS5maWx0ZXJlZCwgCiAgICAgICAgICAgICAgICAgICAgZ3JvdXAuYnkgPSBjKCJjZWxsIiwgInNhbXBsZXMiKSwKICAgICAgICAgICAgICAgICAgICBhc3NheXMgPSAnb3JpZ2luYWxleHAnLAogICAgICAgICAgICAgICAgICAgIHNsb3QgPSAiY291bnRzIiwKICAgICAgICAgICAgICAgICAgICByZXR1cm4uc2V1cmF0ID0gRkFMU0UpCgpjdHMgPC0gY3RzJG9yaWdpbmFsZXhwCgojIHRyYW5zcG9zZQpjdHMudCA8LSB0KGN0cykKCgojIGNvbnZlcnQgdG8gZGF0YS5mcmFtZQpjdHMudCA8LSBhcy5kYXRhLmZyYW1lKGN0cy50KQoKIyBnZXQgdmFsdWVzIHdoZXJlIHRvIHNwbGl0CnNwbGl0Um93cyA8LSBnc3ViKCdfLionLCAnJywgcm93bmFtZXMoY3RzLnQpKQoKCiMgc3BsaXQgZGF0YS5mcmFtZQpjdHMuc3BsaXQgPC0gc3BsaXQuZGF0YS5mcmFtZShjdHMudCwKICAgICAgICAgICAgICAgICBmID0gZmFjdG9yKHNwbGl0Um93cykpCgojIGZpeCBjb2xuYW1lcyBhbmQgdHJhbnNwb3NlCgpjdHMuc3BsaXQubW9kaWZpZWQgPC0gbGFwcGx5KGN0cy5zcGxpdCwgZnVuY3Rpb24oeCl7CiAgcm93bmFtZXMoeCkgPC0gZ3N1YignLipfKC4qKScsICdcXDEnLCByb3duYW1lcyh4KSkKICB0KHgpCiAgCn0pCgojZ3N1YignLipfKC4qKScsICdcXDEnLCAnQiBjZWxsc19jdHJsMTAxJykKCgoKIyBMZXQncyBydW4gREUgYW5hbHlzaXMgd2l0aCBCIGNlbGxzCiMgMS4gR2V0IGNvdW50cyBtYXRyaXgKY291bnRzX2JjZWxsIDwtIGN0cy5zcGxpdC5tb2RpZmllZCRgQiBjZWxsc2AKCgojIDIuIGdlbmVyYXRlIHNhbXBsZSBsZXZlbCBtZXRhZGF0YQpjb2xEYXRhIDwtIGRhdGEuZnJhbWUoc2FtcGxlcyA9IGNvbG5hbWVzKGNvdW50c19iY2VsbCkpCgpjb2xEYXRhIDwtIGNvbERhdGEgJT4lCiAgbXV0YXRlKGNvbmRpdGlvbiA9IGlmZWxzZShncmVwbCgnc3RpbScsIHNhbXBsZXMpLCAnU3RpbXVsYXRlZCcsICdDb250cm9sJykpICU+JQogIGNvbHVtbl90b19yb3duYW1lcyh2YXIgPSAnc2FtcGxlcycpCgojIGdldCBtb3JlIGluZm9ybWF0aW9uIGZyb20gbWV0YWRhdGEKCgoKCiMgcGVyZm9ybSBERVNlcTIgLS0tLS0tLS0KIyBDcmVhdGUgREVTZXEyIG9iamVjdCAgIApkZHMgPC0gREVTZXFEYXRhU2V0RnJvbU1hdHJpeChjb3VudERhdGEgPSBjb3VudHNfYmNlbGwsCiAgICAgICAgICAgICAgICAgICAgICAgY29sRGF0YSA9IGNvbERhdGEsCiAgICAgICAgICAgICAgICAgICAgICAgZGVzaWduID0gfiBjb25kaXRpb24pCgojIGZpbHRlcgprZWVwIDwtIHJvd1N1bXMoY291bnRzKGRkcykpID49MTAKZGRzIDwtIGRkc1trZWVwLF0KCiMgcnVuIERFU2VxMgpkZHMgPC0gREVTZXEoZGRzKQoKCgojIENoZWNrIHRoZSBjb2VmZmljaWVudHMgZm9yIHRoZSBjb21wYXJpc29uCnJlc3VsdHNOYW1lcyhkZHMpCgojIEdlbmVyYXRlIHJlc3VsdHMgb2JqZWN0CnJlcyA8LSByZXN1bHRzKGRkcywgbmFtZSA9ICJjb25kaXRpb25fU3RpbXVsYXRlZF92c19Db250cm9sIikKcmVzCmBgYAoK