1 Functions

tpm <- function(counts, lengths) {
  rpk <- counts / lengths
  coef <- sum(rpk) / 1e6
  rpk/coef
}

rpk <- function(counts, lengths) {
  rpk <- counts / lengths
  rpk
}

2 Read data

acc_immune_data_part1 <- read.delim("./Data/raw_data/SN0276364",skip=1,header=T, 
                       sep="\t",stringsAsFactors=F,row.names=1,check.names = F)

acc_immune_data_part2 <- read.delim("./Data/raw_data/SN0276892",skip=1,header=T, 
                       sep="\t",stringsAsFactors=F,row.names=1,check.names = F)

3 Pre-process

#save before omitting
genes_names = acc_immune_data_part1$gene_name
genes_lengths = acc_immune_data_part1[,5]

#omit non relevant cols
acc_immune_data_part1=acc_immune_data_part1[,7:ncol(acc_immune_data_part1)]
acc_immune_data_part2=acc_immune_data_part2[,7:ncol(acc_immune_data_part2)]

#combine datasets
acc_immune_data = cbind(acc_immune_data_part1,acc_immune_data_part2)

#create genes names
genes_names=make.unique(genes_names) %>% replace_na('NA')
rownames(acc_immune_data) = genes_names

#omit non relevant genes
omitgenes= startsWith(rownames(acc_immune_data),"NA")
acc_immune_data=acc_immune_data[!omitgenes,]
genes_lengths = genes_lengths[!omitgenes] #update genes_lengths

#calcualte gene length and MT genes
mt_genes = startsWith(rownames(acc_immune_data),"MT-")| startsWith(rownames(acc_immune_data),"ERCC-")

#get colnames
cell.labels <- gsub(pattern = "/.*$",replacement = "",colnames(acc_immune_data))

#change colnames
colnames(acc_immune_data) <- cell.labels


acc_immune_counts <- CreateSeuratObject(counts = acc_immune_data, project = "acc_immune_counts", min.cells = 3, min.features = 1000)
Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')

4 QC

acc_immune_counts@meta.data[["percent.mt"]] <- PercentageFeatureSet(acc_immune_counts, pattern = "^MT-")
print_tab(plt = 
            FeatureScatter(acc_immune_counts, feature1 = "nCount_RNA", feature2 = "percent.mt") + 
            theme(legend.position="none", axis.text.x = element_text(size=8)) + 
              geom_point(color='darkblue')
  
            ,title = "MT percentages")

MT percentages

print_tab(plt = 
            VlnPlot(acc_immune_counts, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
,title = "violin plots")

violin plots

NA

acc_immune_data.rpk=apply(acc_immune_data[!mt_genes,], 2, function(x) rpk(x, genes_lengths[!mt_genes]))
acc_immune <- CreateSeuratObject(counts = acc_immune_data.rpk, project = "acc_immune", min.cells = 3, min.features = 1000)
Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')
acc_immune = NormalizeData(object = acc_immune,scale.factor = 1e6) #create TPM
Performing log-normalization
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
acc_immune@assays$RNA@data = log2(exp(1)) * acc_immune@assays$RNA@data #convert from e base to 2 base log
# new_genes = rownames(acc_immune_data) %in% rownames(acc_immune)
# acc_immune_data.tpm=apply(acc_immune_data[new_genes,], 2, function(x) tpm(x, genes_lengths[new_genes])) %>%as.data.frame()
# acc_immune_data.tpm = log2(acc_immune_data.tpm+1)

5 Filtering

nFeature_RNA_threshold = 1000
percent.mt_threshold = 40
print ("nFeature_RNA threshold = " %>% paste(nFeature_RNA_threshold))
[1] "nFeature_RNA threshold =  1000"
print ("percent.mt threshold = " %>% paste(percent.mt_threshold))
[1] "percent.mt threshold =  40"
acc_immune <- subset(acc_immune, subset = nFeature_RNA > nFeature_RNA_threshold & percent.mt < percent.mt_threshold)
acc_immune
An object of class Seurat 
22647 features across 621 samples within 1 assay 
Active assay: RNA (22647 features, 0 variable features)

6 PCA

# Identification of highly variable features
acc_immune <- FindVariableFeatures(acc_immune, selection.method = "vst", nfeatures = 15000) 
# Scaling the data
acc_immune <- ScaleData(acc_immune, vars.to.regress = c("percent.mt","nCount_RNA"))
# Perform linear dimensional reduction (PCA)
acc_immune <- RunPCA(acc_immune, features = VariableFeatures(object = acc_immune))
ElbowPlot(acc_immune, ndims = 50) # checking the dimensionality 

pc2use=1:10
clus_res=1
print("PCA dims = " %>% paste(max(pc2use)))
[1] "PCA dims =  10"

7 UMAP and clustering

acc_immune <- FindNeighbors(acc_immune, dims = pc2use,verbose = F)
acc_immune <- FindClusters(acc_immune, resolution = clus_res,verbose = F)

# Run non-linear dimensional reduction (UMAP)
acc_immune <- RunUMAP(acc_immune, dims = pc2use,verbose = F)
DimPlot(object = acc_immune, reduction = "umap", pt.size = 1, label = F)

8 UMAPS




#get metedata
plate = str_extract(colnames(acc_immune), "^.*-P[0-9]*")
patient.ident = str_extract(colnames(acc_immune), "ACC[0-9]*")
origin = str_extract(colnames(acc_immune), "LN")
origin = origin %>% replace_na('Primary')

acc_immune <- AddMetaData(object = acc_immune, metadata = as.factor(patient.ident), col.name = "patient.ident")
acc_immune <- AddMetaData(object = acc_immune, metadata = as.factor(plate), col.name = "plate")
acc_immune <- AddMetaData(object = acc_immune, metadata = as.factor(origin), col.name = "origin")

print_tab(DimPlot(acc_immune, reduction = "umap", label = F, pt.size = 1,group.by = "patient.ident") 
,title = "by patient")

by patient

print_tab(plt = 
            DimPlot(acc_immune, reduction = "umap", label = F, pt.size = 1,group.by = "plate") 
        ,title =  "by plate")

by plate

print_tab(plt =
            FeaturePlot(object = acc_immune, features = "PTPRC")
          ,
          title = "CD45")

CD45

print_tab(plt = 
            DimPlot(acc_immune, reduction = "umap", label = F, pt.size = 1,group.by = "origin") 
        ,title =  "by origin")

by origin

NA

LS0tCnRpdGxlOiAnYHIgcnN0dWRpb2FwaTo6Z2V0U291cmNlRWRpdG9yQ29udGV4dCgpJHBhdGggJT4lIGJhc2VuYW1lKCkgJT4lIGdzdWIocGF0dGVybiA9ICJcXC5SbWQiLHJlcGxhY2VtZW50ID0gIiIpYCcgCmF1dGhvcjogIkF2aXNoYWkgV2l6ZWwiCmRhdGU6ICdgciBTeXMudGltZSgpYCcKb3V0cHV0OiAKICBodG1sX25vdGVib29rOiAKICAgIGNvZGVfZm9sZGluZzogaGlkZQogICAgdG9jOiB5ZXMKICAgIHRvY19jb2xsYXBzZTogeWVzCiAgICB0b2NfZmxvYXQ6IAogICAgICBjb2xsYXBzZWQ6IEZBTFNFCiAgICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICAgIHRvY19kZXB0aDogMQotLS0KCgojIEZ1bmN0aW9ucwoKYGBge3Igd2FybmluZz1GQUxTRX0KdHBtIDwtIGZ1bmN0aW9uKGNvdW50cywgbGVuZ3RocykgewogIHJwayA8LSBjb3VudHMgLyBsZW5ndGhzCiAgY29lZiA8LSBzdW0ocnBrKSAvIDFlNgogIHJway9jb2VmCn0KCnJwayA8LSBmdW5jdGlvbihjb3VudHMsIGxlbmd0aHMpIHsKICBycGsgPC0gY291bnRzIC8gbGVuZ3RocwogIHJwawp9CmBgYAoKIyBSZWFkIGRhdGEKYGBge3J9CmFjY19pbW11bmVfZGF0YV9wYXJ0MSA8LSByZWFkLmRlbGltKCIuL0RhdGEvcmF3X2RhdGEvU04wMjc2MzY0Iixza2lwPTEsaGVhZGVyPVQsIAogICAgICAgICAgICAgICAgICAgICAgIHNlcD0iXHQiLHN0cmluZ3NBc0ZhY3RvcnM9Rixyb3cubmFtZXM9MSxjaGVjay5uYW1lcyA9IEYpCgphY2NfaW1tdW5lX2RhdGFfcGFydDIgPC0gcmVhZC5kZWxpbSgiLi9EYXRhL3Jhd19kYXRhL1NOMDI3Njg5MiIsc2tpcD0xLGhlYWRlcj1ULCAKICAgICAgICAgICAgICAgICAgICAgICBzZXA9Ilx0IixzdHJpbmdzQXNGYWN0b3JzPUYscm93Lm5hbWVzPTEsY2hlY2submFtZXMgPSBGKQpgYGAKCiMgUHJlLXByb2Nlc3MKYGBge3J9CiNzYXZlIGJlZm9yZSBvbWl0dGluZwpnZW5lc19uYW1lcyA9IGFjY19pbW11bmVfZGF0YV9wYXJ0MSRnZW5lX25hbWUKZ2VuZXNfbGVuZ3RocyA9IGFjY19pbW11bmVfZGF0YV9wYXJ0MVssNV0KCiNvbWl0IG5vbiByZWxldmFudCBjb2xzCmFjY19pbW11bmVfZGF0YV9wYXJ0MT1hY2NfaW1tdW5lX2RhdGFfcGFydDFbLDc6bmNvbChhY2NfaW1tdW5lX2RhdGFfcGFydDEpXQphY2NfaW1tdW5lX2RhdGFfcGFydDI9YWNjX2ltbXVuZV9kYXRhX3BhcnQyWyw3Om5jb2woYWNjX2ltbXVuZV9kYXRhX3BhcnQyKV0KCiNjb21iaW5lIGRhdGFzZXRzCmFjY19pbW11bmVfZGF0YSA9IGNiaW5kKGFjY19pbW11bmVfZGF0YV9wYXJ0MSxhY2NfaW1tdW5lX2RhdGFfcGFydDIpCgojY3JlYXRlIGdlbmVzIG5hbWVzCmdlbmVzX25hbWVzPW1ha2UudW5pcXVlKGdlbmVzX25hbWVzKSAlPiUgcmVwbGFjZV9uYSgnTkEnKQpyb3duYW1lcyhhY2NfaW1tdW5lX2RhdGEpID0gZ2VuZXNfbmFtZXMKCiNvbWl0IG5vbiByZWxldmFudCBnZW5lcwpvbWl0Z2VuZXM9IHN0YXJ0c1dpdGgocm93bmFtZXMoYWNjX2ltbXVuZV9kYXRhKSwiTkEiKQphY2NfaW1tdW5lX2RhdGE9YWNjX2ltbXVuZV9kYXRhWyFvbWl0Z2VuZXMsXQpnZW5lc19sZW5ndGhzID0gZ2VuZXNfbGVuZ3Roc1shb21pdGdlbmVzXSAjdXBkYXRlIGdlbmVzX2xlbmd0aHMKCiNjYWxjdWFsdGUgZ2VuZSBsZW5ndGggYW5kIE1UIGdlbmVzCm10X2dlbmVzID0gc3RhcnRzV2l0aChyb3duYW1lcyhhY2NfaW1tdW5lX2RhdGEpLCJNVC0iKXwgc3RhcnRzV2l0aChyb3duYW1lcyhhY2NfaW1tdW5lX2RhdGEpLCJFUkNDLSIpCgojZ2V0IGNvbG5hbWVzCmNlbGwubGFiZWxzIDwtIGdzdWIocGF0dGVybiA9ICIvLiokIixyZXBsYWNlbWVudCA9ICIiLGNvbG5hbWVzKGFjY19pbW11bmVfZGF0YSkpCgojY2hhbmdlIGNvbG5hbWVzCmNvbG5hbWVzKGFjY19pbW11bmVfZGF0YSkgPC0gY2VsbC5sYWJlbHMKCgphY2NfaW1tdW5lX2NvdW50cyA8LSBDcmVhdGVTZXVyYXRPYmplY3QoY291bnRzID0gYWNjX2ltbXVuZV9kYXRhLCBwcm9qZWN0ID0gImFjY19pbW11bmVfY291bnRzIiwgbWluLmNlbGxzID0gMywgbWluLmZlYXR1cmVzID0gMTAwMCkKCmBgYAoKIyBRQyAgey50YWJzZXR9CmBgYHtyIGVjaG89VFJVRSwgcmVzdWx0cz0nYXNpcyd9CmFjY19pbW11bmVfY291bnRzQG1ldGEuZGF0YVtbInBlcmNlbnQubXQiXV0gPC0gUGVyY2VudGFnZUZlYXR1cmVTZXQoYWNjX2ltbXVuZV9jb3VudHMsIHBhdHRlcm4gPSAiXk1ULSIpCnByaW50X3RhYihwbHQgPSAKICAgICAgICAgICAgRmVhdHVyZVNjYXR0ZXIoYWNjX2ltbXVuZV9jb3VudHMsIGZlYXR1cmUxID0gIm5Db3VudF9STkEiLCBmZWF0dXJlMiA9ICJwZXJjZW50Lm10IikgKyAKICAgICAgICAgICAgdGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIiwgYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoc2l6ZT04KSkgKyAKICAgICAgICAgICAgICBnZW9tX3BvaW50KGNvbG9yPSdkYXJrYmx1ZScpCiAgCiAgICAgICAgICAgICx0aXRsZSA9ICJNVCBwZXJjZW50YWdlcyIpCgpwcmludF90YWIocGx0ID0gCiAgICAgICAgICAgIFZsblBsb3QoYWNjX2ltbXVuZV9jb3VudHMsIGZlYXR1cmVzID0gYygibkZlYXR1cmVfUk5BIiwgIm5Db3VudF9STkEiLCAicGVyY2VudC5tdCIpLCBuY29sID0gMykKLHRpdGxlID0gInZpb2xpbiBwbG90cyIpCgpgYGAKCgpgYGB7cn0KYWNjX2ltbXVuZV9kYXRhLnJwaz1hcHBseShhY2NfaW1tdW5lX2RhdGFbIW10X2dlbmVzLF0sIDIsIGZ1bmN0aW9uKHgpIHJwayh4LCBnZW5lc19sZW5ndGhzWyFtdF9nZW5lc10pKQphY2NfaW1tdW5lIDwtIENyZWF0ZVNldXJhdE9iamVjdChjb3VudHMgPSBhY2NfaW1tdW5lX2RhdGEucnBrLCBwcm9qZWN0ID0gImFjY19pbW11bmUiLCBtaW4uY2VsbHMgPSAzLCBtaW4uZmVhdHVyZXMgPSAxMDAwKQpgYGAKYGBge3J9CmFjY19pbW11bmUgPSBOb3JtYWxpemVEYXRhKG9iamVjdCA9IGFjY19pbW11bmUsc2NhbGUuZmFjdG9yID0gMWU2KSAjY3JlYXRlIFRQTQphY2NfaW1tdW5lQGFzc2F5cyRSTkFAZGF0YSA9IGxvZzIoZXhwKDEpKSAqIGFjY19pbW11bmVAYXNzYXlzJFJOQUBkYXRhICNjb252ZXJ0IGZyb20gZSBiYXNlIHRvIDIgYmFzZSBsb2cKYGBgCgoKYGBge3J9CiMgbmV3X2dlbmVzID0gcm93bmFtZXMoYWNjX2ltbXVuZV9kYXRhKSAlaW4lIHJvd25hbWVzKGFjY19pbW11bmUpCiMgYWNjX2ltbXVuZV9kYXRhLnRwbT1hcHBseShhY2NfaW1tdW5lX2RhdGFbbmV3X2dlbmVzLF0sIDIsIGZ1bmN0aW9uKHgpIHRwbSh4LCBnZW5lc19sZW5ndGhzW25ld19nZW5lc10pKSAlPiVhcy5kYXRhLmZyYW1lKCkKIyBhY2NfaW1tdW5lX2RhdGEudHBtID0gbG9nMihhY2NfaW1tdW5lX2RhdGEudHBtKzEpCmBgYAoKIyBGaWx0ZXJpbmcKYGBge3J9Cm5GZWF0dXJlX1JOQV90aHJlc2hvbGQgPSAxMDAwCnBlcmNlbnQubXRfdGhyZXNob2xkID0gNDAKcHJpbnQgKCJuRmVhdHVyZV9STkEgdGhyZXNob2xkID0gIiAlPiUgcGFzdGUobkZlYXR1cmVfUk5BX3RocmVzaG9sZCkpCnByaW50ICgicGVyY2VudC5tdCB0aHJlc2hvbGQgPSAiICU+JSBwYXN0ZShwZXJjZW50Lm10X3RocmVzaG9sZCkpCgphY2NfaW1tdW5lIDwtIHN1YnNldChhY2NfaW1tdW5lLCBzdWJzZXQgPSBuRmVhdHVyZV9STkEgPiBuRmVhdHVyZV9STkFfdGhyZXNob2xkICYgcGVyY2VudC5tdCA8IHBlcmNlbnQubXRfdGhyZXNob2xkKQphY2NfaW1tdW5lCmBgYAoKIyBQQ0EKYGBge3IgcmVzdWx0cz0naGlkZSd9CiMgSWRlbnRpZmljYXRpb24gb2YgaGlnaGx5IHZhcmlhYmxlIGZlYXR1cmVzCmFjY19pbW11bmUgPC0gRmluZFZhcmlhYmxlRmVhdHVyZXMoYWNjX2ltbXVuZSwgc2VsZWN0aW9uLm1ldGhvZCA9ICJ2c3QiLCBuZmVhdHVyZXMgPSAxNTAwMCkgCgojIFNjYWxpbmcgdGhlIGRhdGEKYWNjX2ltbXVuZSA8LSBTY2FsZURhdGEoYWNjX2ltbXVuZSwgdmFycy50by5yZWdyZXNzID0gYygicGVyY2VudC5tdCIsIm5Db3VudF9STkEiKSkKCiMgUGVyZm9ybSBsaW5lYXIgZGltZW5zaW9uYWwgcmVkdWN0aW9uIChQQ0EpCmFjY19pbW11bmUgPC0gUnVuUENBKGFjY19pbW11bmUsIGZlYXR1cmVzID0gVmFyaWFibGVGZWF0dXJlcyhvYmplY3QgPSBhY2NfaW1tdW5lKSkKCkVsYm93UGxvdChhY2NfaW1tdW5lLCBuZGltcyA9IDUwKSAjIGNoZWNraW5nIHRoZSBkaW1lbnNpb25hbGl0eSAKCmBgYApgYGB7cn0KcGMydXNlPTE6MTAKY2x1c19yZXM9MQpwcmludCgiUENBIGRpbXMgPSAiICU+JSBwYXN0ZShtYXgocGMydXNlKSkpCmBgYAoKIyBVTUFQIGFuZCBjbHVzdGVyaW5nCmBgYHtyfQphY2NfaW1tdW5lIDwtIEZpbmROZWlnaGJvcnMoYWNjX2ltbXVuZSwgZGltcyA9IHBjMnVzZSx2ZXJib3NlID0gRikKYWNjX2ltbXVuZSA8LSBGaW5kQ2x1c3RlcnMoYWNjX2ltbXVuZSwgcmVzb2x1dGlvbiA9IGNsdXNfcmVzLHZlcmJvc2UgPSBGKQoKIyBSdW4gbm9uLWxpbmVhciBkaW1lbnNpb25hbCByZWR1Y3Rpb24gKFVNQVApCmFjY19pbW11bmUgPC0gUnVuVU1BUChhY2NfaW1tdW5lLCBkaW1zID0gcGMydXNlLHZlcmJvc2UgPSBGKQpEaW1QbG90KG9iamVjdCA9IGFjY19pbW11bmUsIHJlZHVjdGlvbiA9ICJ1bWFwIiwgcHQuc2l6ZSA9IDEsIGxhYmVsID0gRikKYGBgCiMgVU1BUFMgIHsudGFic2V0fQpgYGB7ciBlY2hvPVRSVUUsIHJlc3VsdHM9J2FzaXMnfQoKCgojZ2V0IG1ldGVkYXRhCnBsYXRlID0gc3RyX2V4dHJhY3QoY29sbmFtZXMoYWNjX2ltbXVuZSksICJeLiotUFswLTldKiIpCnBhdGllbnQuaWRlbnQgPSBzdHJfZXh0cmFjdChjb2xuYW1lcyhhY2NfaW1tdW5lKSwgIkFDQ1swLTldKiIpCm9yaWdpbiA9IHN0cl9leHRyYWN0KGNvbG5hbWVzKGFjY19pbW11bmUpLCAiTE4iKQpvcmlnaW4gPSBvcmlnaW4gJT4lIHJlcGxhY2VfbmEoJ1ByaW1hcnknKQoKYWNjX2ltbXVuZSA8LSBBZGRNZXRhRGF0YShvYmplY3QgPSBhY2NfaW1tdW5lLCBtZXRhZGF0YSA9IGFzLmZhY3RvcihwYXRpZW50LmlkZW50KSwgY29sLm5hbWUgPSAicGF0aWVudC5pZGVudCIpCmFjY19pbW11bmUgPC0gQWRkTWV0YURhdGEob2JqZWN0ID0gYWNjX2ltbXVuZSwgbWV0YWRhdGEgPSBhcy5mYWN0b3IocGxhdGUpLCBjb2wubmFtZSA9ICJwbGF0ZSIpCmFjY19pbW11bmUgPC0gQWRkTWV0YURhdGEob2JqZWN0ID0gYWNjX2ltbXVuZSwgbWV0YWRhdGEgPSBhcy5mYWN0b3Iob3JpZ2luKSwgY29sLm5hbWUgPSAib3JpZ2luIikKCnByaW50X3RhYihEaW1QbG90KGFjY19pbW11bmUsIHJlZHVjdGlvbiA9ICJ1bWFwIiwgbGFiZWwgPSBGLCBwdC5zaXplID0gMSxncm91cC5ieSA9ICJwYXRpZW50LmlkZW50IikgCix0aXRsZSA9ICJieSBwYXRpZW50IikKcHJpbnRfdGFiKHBsdCA9IAogICAgICAgICAgICBEaW1QbG90KGFjY19pbW11bmUsIHJlZHVjdGlvbiA9ICJ1bWFwIiwgbGFiZWwgPSBGLCBwdC5zaXplID0gMSxncm91cC5ieSA9ICJwbGF0ZSIpIAogICAgICAgICx0aXRsZSA9ICAiYnkgcGxhdGUiKQoKcHJpbnRfdGFiKHBsdCA9CiAgICAgICAgICAgIEZlYXR1cmVQbG90KG9iamVjdCA9IGFjY19pbW11bmUsIGZlYXR1cmVzID0gIlBUUFJDIikKICAgICAgICAgICwKICAgICAgICAgIHRpdGxlID0gIkNENDUiKQpwcmludF90YWIocGx0ID0gCiAgICAgICAgICAgIERpbVBsb3QoYWNjX2ltbXVuZSwgcmVkdWN0aW9uID0gInVtYXAiLCBsYWJlbCA9IEYsIHB0LnNpemUgPSAxLGdyb3VwLmJ5ID0gIm9yaWdpbiIpIAogICAgICAgICx0aXRsZSA9ICAiYnkgb3JpZ2luIikKYGBgCgo=