10x_GSAF_JA21194 Amil Indo1,Indo2
10x_GSAF_JA21309 Amil C1,C2
10x_GSAF_JA21367 Ofav CMF5,CYS5
10x_GSAF_JA21377 Ofav CYS11_6250
10x_GSAF_JA21386 Ofav CYS19C,CYS19S
Note: Indo1, Indo 2 are adult pilots using the LT kit that capture only 500 cells for each sample. Indo 1 and 2 are identical. CMF5, CYS5 are day 5, CMF5 is not treated with 4% cysteine for cleaning mucus 19C and 19S means swimmers and crawlers. Do not use C1 and C2, they’re normal kit, but with trouble in library prep.
Cell Ranger is a set of analysis pipelines that process Chromium single cell data to align reads, generate feature-barcode matrices, perform clustering and other secondary analysis, and more (see list of example workflows and supported libraries). Cell Ranger includes five pipelines relevant to the 3’ Single Cell Gene Expression Solutions and related products:
Install cellranger
get link from https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/latest?
cd /work/08717/dmflores/ls6/src
curl -o cellranger-7.0.1.tar.gz "https://cf.10xgenomics.com/releases/cell-exp/cellranger-7.1.0.tar.gz?Expires=1675317962&Policy=eyJTdGF0ZW1lbnQiOlt7IlJlc291cmNlIjoiaHR0cHM6Ly9jZi4xMHhnZW5vbWljcy5jb20vcmVsZWFzZXMvY2VsbC1leHAvY2VsbHJhbmdlci03LjEuMC50YXIuZ3oiLCJDb25kaXRpb24iOnsiRGF0ZUxlc3NUaGFuIjp7IkFXUzpFcG9jaFRpbWUiOjE2NzUzMTc5NjJ9fX1dfQ__&Signature=YvV~jfnLNG6jy~m9xxRIRyiruM1p7jjt0NNGjHLr~2cwuIdJ27VwDWDpVltDujax3qk3nKlIk2e8SviBdj7boA6OUjTeV8Z3VOKt0bJb5zX6OXjmMiEL7uIKxAC5GYrDVOxsB9gOD74l8Fo~F6aDiNo7EtOihr9D35WfUvQgN0BtaHSeU4tUKKZ4wAUF3xItuygLFRCm3ExGoIQ~d3FjXso3nLZHAvxeh2ez1C2CFl6ksuL4XvmfI6ZOkR9Bpo6PSqeuApJYcXdPYPVeAoLEBZWrukeljnDasQjxs1VuAMJD6Gs6hbQn0QUSHPsCHb3~ajpPHbZX8XTWxrNQemrYqg__&Key-Pair-Id=APKAI7S6A5RYOXBWRPDA"
#Unpack
tar -xzvf cellranger-7.0.1.tar.gz
#Prepend the Cell Ranger directory to $PATH (put it in ~/.bashrc)
export PATH=$HOME/bin/cellranger-7.1.0:$PATHAcropora millepora single-cell data 2022.10.11
Make reference
cellranger mkref --genome=amil_genome --fasta=Amil.v2.01.fasta --genes=Amil.all.maker.noseq.gtfGenerate Matrix
# Amil-A-D5
screen
cd /home/yjluo/data/acropora/au_x_au_d5
cellranger count \
--id au_x_au_d5 \
--transcriptome /home/yjluo/work/coral/amil_genome \
--fastqs /home/yjluo/data/acropora/10x_reads/Amil-A-D5 \
--expect-cells 5000
# Amil-A-D7
screen
cd /home/yjluo/data/acropora/au_x_au_d7
cellranger count \
--id au_x_au_d7 \
--transcriptome /home/yjluo/work/coral/amil_genome \
--fastqs /home/yjluo/data/acropora/10x_reads/Amil-A-D7 \
--expect-cells 5000
# Amil-I-D5
screen
cd /home/yjluo/data/acropora/au_x_id_d5
cellranger count \
--id au_x_id_d5 \
--transcriptome /home/yjluo/work/coral/amil_genome \
--fastqs /home/yjluo/data/acropora/10x_reads/Amil-I-D5 \
--expect-cells 5000
# Amil-I-D7
screen
cd /home/yjluo/data/acropora/au_x_id_d7
cellranger count \
--id au_x_id_d7 \
--transcriptome /home/yjluo/work/coral/amil_genome \
--fastqs /home/yjluo/data/acropora/10x_reads/Amil-I-D7 \
--expect-cells 5000Outputs
- Run summary HTML: path/outs/web_summary.html
- Run summary CSV: path/outs/metrics_summary.csv
- BAM: path/outs/possorted_genome_bam.bam
- BAM BAI index: path/outs/possorted_genome_bam.bam.bai
- BAM CSI index: null
- Filtered feature-barcode matrices MEX: path/outs/filtered_feature_bc_matrix
- Filtered feature-barcode matrices HDF5: path/outs/filtered_feature_bc_matrix.h5
- Unfiltered feature-barcode matrices MEX: path/outs/raw_feature_bc_matrix
- Unfiltered feature-barcode matrices HDF5: path/outs/raw_feature_bc_matrix.h5
- Secondary analysis output CSV: path/outs/analysis
- Per-molecule read information: path/outs/molecule_info.h5
- CRISPR-specific analysis: null
- CSP-specific analysis: null
- Loupe Browser file: path/outs/cloupe.cloupe
- Feature Reference: null
- Target Panel File: null
- Probe Set File: null
Copy Web Summary Files to local computer
for NAME Indo1 Indo2
do
scp ${Lonestar}:/work/08717/dmflores/ls6/10x_GSAF_JA21194/cellranger_count_toTranscriptome/${NAME}/outs/web_summary.html .
mv web_summary.html web_summary_${NAME}.html
doneCopy Matrix Files to local computer
for NAME in Indo1 Indo2
do
scp ${Lonestar}:/work/08717/dmflores/ls6/10x_GSAF_JA21194/cellranger_count_toTranscriptome/${NAME}/outs/filtered_feature_bc_matrix .
mv filtered_feature_bc_matrix ffbcm_${NAME}
doneCopy Matrix Files to local computer
These are HDF5 data files, which is an efficient way to pack single cell expression matrices.
for NAME in Indo1 Indo2
do
scp ${Lonestar}:/work/08717/dmflores/ls6/10x_GSAF_JA21194/cellranger_count_toTranscriptome/${NAME}/outs/filtered_feature_bc_matrix.h5 .
mv filtered_feature_bc_matrix ffbcm_h5_${NAME}
donelibrary(patchwork)
library(Seurat)## Attaching SeuratObject
library(hdf5r)setwd("~/Desktop/scAmil")
# Setup the Seurat objects
# Load the dataset
#These are HDF5 data files, which is an efficient way to pack single cell expression matrices.
matrix_I1 <- Read10X_h5("ffbcm_h5_Indo1",use.names = T)
matrix_I2 <- Read10X_h5("ffbcm_h5_Indo2",use.names = T)
# Initialize the Seurat object with the raw (non-normalized data).
Amil1 <- CreateSeuratObject(matrix_I1, project = "Indo1")## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Warning: Feature names cannot have pipe characters ('|'), replacing with dashes
## ('-')
Amil2 <- CreateSeuratObject(matrix_I2, project = "Indo2")## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Warning: Feature names cannot have pipe characters ('|'), replacing with dashes
## ('-')
#remove Matrix to save memory
rm(matrix_I1)
rm(matrix_I2)#calculate the fractions of mitochondrial genes and ribosomal proteins, and do quick-and-dirty filtering of the datasets
Amil1[["percent.mt"]] <- PercentageFeatureSet(Amil1, pattern = "^MT-")
Amil1[["percent.rbp"]] <- PercentageFeatureSet(Amil1, pattern = "^RP[SL]")
Amil2[["percent.mt"]] <- PercentageFeatureSet(Amil2, pattern = "^MT-")
Amil2[["percent.rbp"]] <- PercentageFeatureSet(Amil2, pattern = "^RP[SL]")
VlnPlot(Amil1, features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rbp"), ncol = 4)## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of percent.mt.
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of percent.rbp.
VlnPlot(Amil2, features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rbp"), ncol=4)## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of percent.mt.
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of percent.rbp.
#cell annotations similarity
table(rownames(Amil1) %in% rownames(Amil2)) ##
## TRUE
## 39291
#True:39291
#Quick filtering of the datasets removes dying cells and putative doublets:
Am1 <- subset(Amil1, subset = nFeature_RNA > 500 & nFeature_RNA < 5000)
Am2 <- subset(Amil2, subset = nFeature_RNA > 500 & nFeature_RNA < 5000)
rm(Amil1)
rm(Amil2)#Make R List of objects
Amil_list<-list()
Amil_list[["Am1"]] <- Am1
Amil_list[["Am2"]] <- Am2
# normalize/find HVG for each:
for (i in 1:length(Amil_list)) {
Amil_list[[i]] <- NormalizeData(Amil_list[[i]], verbose = F)
Amil_list[[i]] <- FindVariableFeatures(Amil_list[[i]], selection.method = "vst", nfeatures = 2000, verbose = F)
}## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.3913
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.32311
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 8.6449e-28
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090619
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.4269
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.32306
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.5402e-27
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090619
#Integration
Amil_anchors <- FindIntegrationAnchors(object.list = Amil_list, dims = 1:30)## Warning in CheckDuplicateCellNames(object.list = object.list): Some cell names
## are duplicated across objects provided. Renaming to enforce unique cell names.
## Computing 2000 integration features
## Scaling features for provided objects
## Finding all pairwise anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 942 anchors
## Filtering anchors
## Retained 929 anchors
Amil_seurat <- IntegrateData(anchorset = Amil_anchors, dims = 1:30)## Merging dataset 1 into 2
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
##Seurat integration creates a unified object that contains both original data (‘RNA’ assay) as well as integrated data (‘integrated’ assay).
rm(Amil_list)
rm(Amil_anchors)#Visualize dataset before integration
DefaultAssay(Amil_seurat) <- "RNA"
Amil_seurat <- NormalizeData(Amil_seurat, verbose = F)
Amil_seurat <- FindVariableFeatures(Amil_seurat, selection.method = "vst", nfeatures = 2000, verbose = F)## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.712
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.50068
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 2.5544e-15
Amil_seurat <- ScaleData(Amil_seurat, verbose = F)
Amil_seurat <- RunPCA(Amil_seurat, npcs = 30, verbose = F)
Amil_seurat <- RunUMAP(Amil_seurat, reduction = "pca", dims = 1:30, verbose = F)## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
DimPlot(Amil_seurat,reduction = "umap") + plot_annotation(title = "Amil Indo1 and Amil Indo2 cells, before integration")#Visualize dataset after integration
DefaultAssay(Amil_seurat) <- "integrated"
Amil_seurat <- ScaleData(Amil_seurat, verbose = F)
Amil_seurat <- RunPCA(Amil_seurat, npcs = 30, verbose = F)
Amil_seurat <- RunUMAP(Amil_seurat, reduction = "pca", dims = 1:30, verbose = F)
DimPlot(Amil_seurat, reduction = "umap") + plot_annotation(title = "Amil Indo1 and Amil Indo2 cells, after integration")#SplitPlot
DimPlot(Amil_seurat, reduction = "umap", split.by = "orig.ident") + NoLegend()#Clusters
Amil_seurat <- FindNeighbors(Amil_seurat, dims = 1:30, k.param = 10, verbose = F)
Amil_seurat <- FindClusters(Amil_seurat, verbose = F)
DimPlot(Amil_seurat,label = T) + NoLegend()Run Plot_integrated_clusters function in Appendix
#Number of cells in cluster
count_table <- table(Amil_seurat@meta.data$seurat_clusters, Amil_seurat@meta.data$orig.ident)
count_table##
## Indo1 Indo2
## 0 58 52
## 1 38 55
## 2 34 43
## 3 26 39
## 4 32 25
## 5 15 11
## 6 8 12
## 7 10 6
## 8 7 5
## 9 6 6
plot_integrated_clusters(Amil_seurat) str(Am1)## Formal class 'Seurat' [package "SeuratObject"] with 13 slots
## ..@ assays :List of 1
## .. ..$ RNA:Formal class 'Assay' [package "SeuratObject"] with 8 slots
## .. .. .. ..@ counts :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. .. .. .. ..@ i : int [1:191526] 85 131 171 172 177 269 277 358 401 481 ...
## .. .. .. .. .. ..@ p : int [1:235] 0 729 1542 2111 3513 4925 5647 6472 7076 7604 ...
## .. .. .. .. .. ..@ Dim : int [1:2] 39291 234
## .. .. .. .. .. ..@ Dimnames:List of 2
## .. .. .. .. .. .. ..$ : chr [1:39291] "isotig09765-gfpl-3" "isotig01446-gfpl-2" "isotig12578-nfcp-3" "isotig15553-gfpl-4" ...
## .. .. .. .. .. .. ..$ : chr [1:234] "AATCACGAGCCTCTTC-1" "AATCACGCAATCTCGA-1" "AATCACGCACCCTCTA-1" "AATCACGCACTAAACC-1" ...
## .. .. .. .. .. ..@ x : num [1:191526] 1 1 1 1 1 1 1 3 1 2 ...
## .. .. .. .. .. ..@ factors : list()
## .. .. .. ..@ data :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. .. .. .. .. ..@ i : int [1:191526] 85 131 171 172 177 269 277 358 401 481 ...
## .. .. .. .. .. ..@ p : int [1:235] 0 729 1542 2111 3513 4925 5647 6472 7076 7604 ...
## .. .. .. .. .. ..@ Dim : int [1:2] 39291 234
## .. .. .. .. .. ..@ Dimnames:List of 2
## .. .. .. .. .. .. ..$ : chr [1:39291] "isotig09765-gfpl-3" "isotig01446-gfpl-2" "isotig12578-nfcp-3" "isotig15553-gfpl-4" ...
## .. .. .. .. .. .. ..$ : chr [1:234] "AATCACGAGCCTCTTC-1" "AATCACGCAATCTCGA-1" "AATCACGCACCCTCTA-1" "AATCACGCACTAAACC-1" ...
## .. .. .. .. .. ..@ x : num [1:191526] 1 1 1 1 1 1 1 3 1 2 ...
## .. .. .. .. .. ..@ factors : list()
## .. .. .. ..@ scale.data : num[0 , 0 ]
## .. .. .. ..@ key : chr "rna_"
## .. .. .. ..@ assay.orig : NULL
## .. .. .. ..@ var.features : chr(0)
## .. .. .. ..@ meta.features:'data.frame': 39291 obs. of 0 variables
## .. .. .. ..@ misc : list()
## ..@ meta.data :'data.frame': 234 obs. of 5 variables:
## .. ..$ orig.ident : Factor w/ 1 level "Indo1": 1 1 1 1 1 1 1 1 1 1 ...
## .. ..$ nCount_RNA : num [1:234] 1130 1408 1786 3335 4104 ...
## .. ..$ nFeature_RNA: int [1:234] 729 813 569 1402 1412 722 825 604 528 926 ...
## .. ..$ percent.mt : num [1:234] 0 0 0 0 0 0 0 0 0 0 ...
## .. ..$ percent.rbp : num [1:234] 0 0 0 0 0 0 0 0 0 0 ...
## ..@ active.assay: chr "RNA"
## ..@ active.ident: Factor w/ 1 level "Indo1": 1 1 1 1 1 1 1 1 1 1 ...
## .. ..- attr(*, "names")= chr [1:234] "AATCACGAGCCTCTTC-1" "AATCACGCAATCTCGA-1" "AATCACGCACCCTCTA-1" "AATCACGCACTAAACC-1" ...
## ..@ graphs : list()
## ..@ neighbors : list()
## ..@ reductions : list()
## ..@ images : list()
## ..@ project.name: chr "Indo1"
## ..@ misc : list()
## ..@ version :Classes 'package_version', 'numeric_version' hidden list of 1
## .. ..$ : int [1:3] 4 1 3
## ..@ commands : list()
## ..@ tools : list()
meta <- Am1@meta.data
dim(meta)## [1] 234 5
head(meta)## orig.ident nCount_RNA nFeature_RNA percent.mt percent.rbp
## AATCACGAGCCTCTTC-1 Indo1 1130 729 0 0
## AATCACGCAATCTCGA-1 Indo1 1408 813 0 0
## AATCACGCACCCTCTA-1 Indo1 1786 569 0 0
## AATCACGCACTAAACC-1 Indo1 3335 1402 0 0
## AATCACGGTTAAGCAA-1 Indo1 4104 1412 0 0
## ACAGAAAAGCCTCTTC-1 Indo1 1082 722 0 0
FeatureScatter(Am1, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
#Normalize Data
srat <- NormalizeData(Am1)
#Find most variable
srat <- FindVariableFeatures(srat, selection.method = "vst", nfeatures = 2000)## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.3913
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.32311
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 8.6449e-28
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090619
top10 <- head(VariableFeatures(srat), 10)
top10## [1] "c022217-cryab-2" "c031651-amil-12752" "c040363-amil-1117"
## [4] "c005163-hs71a" "c036692-scr3" "c013092-alma7-5"
## [7] "c031631-amil-6824" "c016035-npc2-2" "c025547-co6a1-2"
## [10] "c021610-angl6-3"
#Normalize Data
srat2 <- NormalizeData(Am2)
#Find most variable
srat2 <- FindVariableFeatures(srat2, selection.method = "vst", nfeatures = 2000)## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.4269
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.32306
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.5402e-27
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090619
top10_indo2 <- head(VariableFeatures(srat2), 10)
top10_indo2## [1] "c005951-pxdn-6" "c031651-amil-12752" "c024979-cthr1-11"
## [4] "c027179-amil-8816" "c016035-npc2-2" "c032789-tasp1-3"
## [7] "c002932-trfm" "c036692-scr3" "c008174-gbp1"
## [10] "c010251-amil-7593"
plot1 <- VariableFeaturePlot(srat)
LabelPoints(plot = plot1, points = top10, repel = TRUE, xnudge = 0, ynudge = 0)## Warning: Transformation introduced infinite values in continuous x-axis
plot2 <- VariableFeaturePlot(srat2)
LabelPoints(plot = plot1, points = top10, repel = TRUE, xnudge = 0, ynudge = 0)## Warning: Transformation introduced infinite values in continuous x-axis
plot_integrated_clusters = function (srat) {
## take an integrated Seurat object, plot distributions over orig.ident
library(Seurat)
library(patchwork)
library(ggplot2)
library(reshape2)
library(RColorBrewer)
count_table <- table(srat@meta.data$seurat_clusters, srat@meta.data$orig.ident)
count_mtx <- as.data.frame.matrix(count_table)
count_mtx$cluster <- rownames(count_mtx)
melt_mtx <- melt(count_mtx)
melt_mtx$cluster <- as.factor(melt_mtx$cluster)
cluster_size <- aggregate(value ~ cluster, data = melt_mtx, FUN = sum)
sorted_labels <- paste(sort(as.integer(levels(cluster_size$cluster)),decreasing = T))
cluster_size$cluster <- factor(cluster_size$cluster,levels = sorted_labels)
melt_mtx$cluster <- factor(melt_mtx$cluster,levels = sorted_labels)
colnames(melt_mtx)[2] <- "dataset"
p1 <- ggplot(cluster_size, aes(y= cluster,x = value)) + geom_bar(position="dodge", stat="identity",fill = "grey60") +
theme_bw() + scale_x_log10() + xlab("Cells per cluster, log10 scale") + ylab("")
p2 <- ggplot(melt_mtx,aes(x=cluster,y=value,fill=dataset)) +
geom_bar(position="fill", stat="identity") + theme_bw() + coord_flip() +
scale_fill_brewer(palette = "Set2") +
ylab("Fraction of cells in each dataset") + xlab("Cluster number") + theme(legend.position="top")
p2 + p1 + plot_layout(widths = c(3,1))
}