13.1 Experiments

  • 10x_GSAF_JA21194 Amil Indo1,Indo2

  • 10x_GSAF_JA21309 Amil C1,C2

  • 10x_GSAF_JA21367 Ofav CMF5,CYS5

  • 10x_GSAF_JA21377 Ofav CYS11_6250

  • 10x_GSAF_JA21386 Ofav CYS19C,CYS19S

Note: Indo1, Indo 2 are adult pilots using the LT kit that capture only 500 cells for each sample. Indo 1 and 2 are identical. CMF5, CYS5 are day 5, CMF5 is not treated with 4% cysteine for cleaning mucus 19C and 19S means swimmers and crawlers. Do not use C1 and C2, they’re normal kit, but with trouble in library prep.

14 Process scRNA reads

Cell Ranger is a set of analysis pipelines that process Chromium single cell data to align reads, generate feature-barcode matrices, perform clustering and other secondary analysis, and more (see list of example workflows and supported libraries). Cell Ranger includes five pipelines relevant to the 3’ Single Cell Gene Expression Solutions and related products:

Install cellranger

get link from https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/latest?

cd /work/08717/dmflores/ls6/src
curl -o cellranger-7.0.1.tar.gz "https://cf.10xgenomics.com/releases/cell-exp/cellranger-7.1.0.tar.gz?Expires=1675317962&Policy=eyJTdGF0ZW1lbnQiOlt7IlJlc291cmNlIjoiaHR0cHM6Ly9jZi4xMHhnZW5vbWljcy5jb20vcmVsZWFzZXMvY2VsbC1leHAvY2VsbHJhbmdlci03LjEuMC50YXIuZ3oiLCJDb25kaXRpb24iOnsiRGF0ZUxlc3NUaGFuIjp7IkFXUzpFcG9jaFRpbWUiOjE2NzUzMTc5NjJ9fX1dfQ__&Signature=YvV~jfnLNG6jy~m9xxRIRyiruM1p7jjt0NNGjHLr~2cwuIdJ27VwDWDpVltDujax3qk3nKlIk2e8SviBdj7boA6OUjTeV8Z3VOKt0bJb5zX6OXjmMiEL7uIKxAC5GYrDVOxsB9gOD74l8Fo~F6aDiNo7EtOihr9D35WfUvQgN0BtaHSeU4tUKKZ4wAUF3xItuygLFRCm3ExGoIQ~d3FjXso3nLZHAvxeh2ez1C2CFl6ksuL4XvmfI6ZOkR9Bpo6PSqeuApJYcXdPYPVeAoLEBZWrukeljnDasQjxs1VuAMJD6Gs6hbQn0QUSHPsCHb3~ajpPHbZX8XTWxrNQemrYqg__&Key-Pair-Id=APKAI7S6A5RYOXBWRPDA"

#Unpack
tar -xzvf cellranger-7.0.1.tar.gz

#Prepend the Cell Ranger directory to $PATH (put it in ~/.bashrc) 
export PATH=$HOME/bin/cellranger-7.1.0:$PATH

Acropora millepora single-cell data 2022.10.11

Make reference

cellranger mkref --genome=amil_genome --fasta=Amil.v2.01.fasta --genes=Amil.all.maker.noseq.gtf

Generate Matrix

# Amil-A-D5
screen
cd /home/yjluo/data/acropora/au_x_au_d5
cellranger count \
--id au_x_au_d5 \
--transcriptome /home/yjluo/work/coral/amil_genome \
--fastqs /home/yjluo/data/acropora/10x_reads/Amil-A-D5 \
--expect-cells 5000

# Amil-A-D7
screen
cd /home/yjluo/data/acropora/au_x_au_d7
cellranger count \
--id au_x_au_d7 \
--transcriptome /home/yjluo/work/coral/amil_genome \
--fastqs /home/yjluo/data/acropora/10x_reads/Amil-A-D7 \
--expect-cells 5000

# Amil-I-D5
screen
cd /home/yjluo/data/acropora/au_x_id_d5
cellranger count \
--id au_x_id_d5 \
--transcriptome /home/yjluo/work/coral/amil_genome \
--fastqs /home/yjluo/data/acropora/10x_reads/Amil-I-D5 \
--expect-cells 5000

# Amil-I-D7
screen
cd /home/yjluo/data/acropora/au_x_id_d7
cellranger count \
--id au_x_id_d7 \
--transcriptome /home/yjluo/work/coral/amil_genome \
--fastqs /home/yjluo/data/acropora/10x_reads/Amil-I-D7 \
--expect-cells 5000

Outputs

- Run summary HTML:                         path/outs/web_summary.html
- Run summary CSV:                          path/outs/metrics_summary.csv
- BAM:                                      path/outs/possorted_genome_bam.bam
- BAM BAI index:                            path/outs/possorted_genome_bam.bam.bai
- BAM CSI index:                            null
- Filtered feature-barcode matrices MEX:    path/outs/filtered_feature_bc_matrix
- Filtered feature-barcode matrices HDF5:   path/outs/filtered_feature_bc_matrix.h5
- Unfiltered feature-barcode matrices MEX:  path/outs/raw_feature_bc_matrix
- Unfiltered feature-barcode matrices HDF5: path/outs/raw_feature_bc_matrix.h5
- Secondary analysis output CSV:            path/outs/analysis
- Per-molecule read information:            path/outs/molecule_info.h5
- CRISPR-specific analysis:                 null
- CSP-specific analysis:                    null
- Loupe Browser file:                       path/outs/cloupe.cloupe
- Feature Reference:                        null
- Target Panel File:                        null
- Probe Set File:                           null

14.1 Amil Indo1 and Indo2

Copy Web Summary Files to local computer

for NAME Indo1 Indo2
do
scp ${Lonestar}:/work/08717/dmflores/ls6/10x_GSAF_JA21194/cellranger_count_toTranscriptome/${NAME}/outs/web_summary.html .
mv web_summary.html web_summary_${NAME}.html
done

Copy Matrix Files to local computer

for NAME in Indo1 Indo2
do
scp ${Lonestar}:/work/08717/dmflores/ls6/10x_GSAF_JA21194/cellranger_count_toTranscriptome/${NAME}/outs/filtered_feature_bc_matrix .
mv filtered_feature_bc_matrix ffbcm_${NAME}
done

Copy Matrix Files to local computer

These are HDF5 data files, which is an efficient way to pack single cell expression matrices.

for NAME in Indo1 Indo2
do
scp ${Lonestar}:/work/08717/dmflores/ls6/10x_GSAF_JA21194/cellranger_count_toTranscriptome/${NAME}/outs/filtered_feature_bc_matrix.h5 .
mv filtered_feature_bc_matrix ffbcm_h5_${NAME}
done

15 Amil Indo1 & Indo2 in R

library(patchwork)
library(Seurat)
## Attaching SeuratObject
library(hdf5r)
setwd("~/Desktop/scAmil")
# Setup the Seurat objects
# Load the dataset
#These are HDF5 data files, which is an efficient way to pack single cell expression matrices.
matrix_I1 <- Read10X_h5("ffbcm_h5_Indo1",use.names = T)
matrix_I2 <- Read10X_h5("ffbcm_h5_Indo2",use.names = T)

# Initialize the Seurat object with the raw (non-normalized data).
Amil1 <- CreateSeuratObject(matrix_I1, project = "Indo1")
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Warning: Feature names cannot have pipe characters ('|'), replacing with dashes
## ('-')
Amil2 <- CreateSeuratObject(matrix_I2, project = "Indo2")
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')

## Warning: Feature names cannot have pipe characters ('|'), replacing with dashes
## ('-')
#remove Matrix to save memory
rm(matrix_I1)
rm(matrix_I2)
#calculate the fractions of mitochondrial genes and ribosomal proteins, and do quick-and-dirty filtering of the datasets
Amil1[["percent.mt"]]  <- PercentageFeatureSet(Amil1, pattern = "^MT-")
Amil1[["percent.rbp"]] <- PercentageFeatureSet(Amil1, pattern = "^RP[SL]")
Amil2[["percent.mt"]]  <- PercentageFeatureSet(Amil2, pattern = "^MT-")
Amil2[["percent.rbp"]] <- PercentageFeatureSet(Amil2, pattern = "^RP[SL]")

VlnPlot(Amil1, features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rbp"), ncol = 4)
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of percent.mt.
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of percent.rbp.

VlnPlot(Amil2, features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rbp"), ncol=4)
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of percent.mt.

## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of percent.rbp.

#cell annotations similarity
table(rownames(Amil1) %in% rownames(Amil2)) 
## 
##  TRUE 
## 39291
#True:39291

#Quick filtering of the datasets removes dying cells and putative doublets:
Am1 <- subset(Amil1, subset = nFeature_RNA > 500 & nFeature_RNA < 5000)
Am2 <- subset(Amil2, subset = nFeature_RNA > 500 & nFeature_RNA < 5000)

rm(Amil1)
rm(Amil2)
#Make R List of objects
Amil_list<-list()
Amil_list[["Am1"]] <- Am1
Amil_list[["Am2"]] <- Am2
# normalize/find HVG for each:
for (i in 1:length(Amil_list)) {
  Amil_list[[i]] <- NormalizeData(Amil_list[[i]], verbose = F)
  Amil_list[[i]] <- FindVariableFeatures(Amil_list[[i]], selection.method = "vst", nfeatures = 2000, verbose = F)
}
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.3913
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.32311
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 8.6449e-28
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090619
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.4269
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.32306
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.5402e-27
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090619
#Integration
Amil_anchors <- FindIntegrationAnchors(object.list = Amil_list, dims = 1:30)
## Warning in CheckDuplicateCellNames(object.list = object.list): Some cell names
## are duplicated across objects provided. Renaming to enforce unique cell names.
## Computing 2000 integration features
## Scaling features for provided objects
## Finding all pairwise anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 942 anchors
## Filtering anchors
##  Retained 929 anchors
Amil_seurat  <- IntegrateData(anchorset = Amil_anchors, dims = 1:30)
## Merging dataset 1 into 2
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
##Seurat integration creates a unified object that contains both original data (‘RNA’ assay) as well as integrated data (‘integrated’ assay).
rm(Amil_list)
rm(Amil_anchors)

15.0.1 Before Integration

#Visualize dataset before integration
DefaultAssay(Amil_seurat) <- "RNA"
Amil_seurat <- NormalizeData(Amil_seurat, verbose = F)
Amil_seurat <- FindVariableFeatures(Amil_seurat, selection.method = "vst", nfeatures = 2000, verbose = F)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.712
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.50068
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 2.5544e-15
Amil_seurat <- ScaleData(Amil_seurat, verbose = F)
Amil_seurat <- RunPCA(Amil_seurat, npcs = 30, verbose = F)
Amil_seurat <- RunUMAP(Amil_seurat, reduction = "pca", dims = 1:30, verbose = F)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
DimPlot(Amil_seurat,reduction = "umap") + plot_annotation(title = "Amil Indo1 and Amil Indo2 cells, before integration")

15.0.2 After Integration

#Visualize dataset after integration
DefaultAssay(Amil_seurat) <- "integrated"
Amil_seurat <- ScaleData(Amil_seurat, verbose = F)
Amil_seurat <- RunPCA(Amil_seurat, npcs = 30, verbose = F)
Amil_seurat <- RunUMAP(Amil_seurat, reduction = "pca", dims = 1:30, verbose = F)

DimPlot(Amil_seurat, reduction = "umap") + plot_annotation(title = "Amil Indo1 and Amil Indo2 cells, after integration")

15.0.2.1 Split Plot

#SplitPlot
DimPlot(Amil_seurat, reduction = "umap", split.by = "orig.ident") + NoLegend()

15.1 Clusters

#Clusters
Amil_seurat <- FindNeighbors(Amil_seurat, dims = 1:30, k.param = 10, verbose = F)
Amil_seurat <- FindClusters(Amil_seurat, verbose = F)
DimPlot(Amil_seurat,label = T) + NoLegend()

Run Plot_integrated_clusters function in Appendix

#Number of cells in cluster
count_table <- table(Amil_seurat@meta.data$seurat_clusters, Amil_seurat@meta.data$orig.ident)
count_table
##    
##     Indo1 Indo2
##   0    58    52
##   1    38    55
##   2    34    43
##   3    26    39
##   4    32    25
##   5    15    11
##   6     8    12
##   7    10     6
##   8     7     5
##   9     6     6
plot_integrated_clusters(Amil_seurat) 

15.2 Explore Data

str(Am1)
## Formal class 'Seurat' [package "SeuratObject"] with 13 slots
##   ..@ assays      :List of 1
##   .. ..$ RNA:Formal class 'Assay' [package "SeuratObject"] with 8 slots
##   .. .. .. ..@ counts       :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
##   .. .. .. .. .. ..@ i       : int [1:191526] 85 131 171 172 177 269 277 358 401 481 ...
##   .. .. .. .. .. ..@ p       : int [1:235] 0 729 1542 2111 3513 4925 5647 6472 7076 7604 ...
##   .. .. .. .. .. ..@ Dim     : int [1:2] 39291 234
##   .. .. .. .. .. ..@ Dimnames:List of 2
##   .. .. .. .. .. .. ..$ : chr [1:39291] "isotig09765-gfpl-3" "isotig01446-gfpl-2" "isotig12578-nfcp-3" "isotig15553-gfpl-4" ...
##   .. .. .. .. .. .. ..$ : chr [1:234] "AATCACGAGCCTCTTC-1" "AATCACGCAATCTCGA-1" "AATCACGCACCCTCTA-1" "AATCACGCACTAAACC-1" ...
##   .. .. .. .. .. ..@ x       : num [1:191526] 1 1 1 1 1 1 1 3 1 2 ...
##   .. .. .. .. .. ..@ factors : list()
##   .. .. .. ..@ data         :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
##   .. .. .. .. .. ..@ i       : int [1:191526] 85 131 171 172 177 269 277 358 401 481 ...
##   .. .. .. .. .. ..@ p       : int [1:235] 0 729 1542 2111 3513 4925 5647 6472 7076 7604 ...
##   .. .. .. .. .. ..@ Dim     : int [1:2] 39291 234
##   .. .. .. .. .. ..@ Dimnames:List of 2
##   .. .. .. .. .. .. ..$ : chr [1:39291] "isotig09765-gfpl-3" "isotig01446-gfpl-2" "isotig12578-nfcp-3" "isotig15553-gfpl-4" ...
##   .. .. .. .. .. .. ..$ : chr [1:234] "AATCACGAGCCTCTTC-1" "AATCACGCAATCTCGA-1" "AATCACGCACCCTCTA-1" "AATCACGCACTAAACC-1" ...
##   .. .. .. .. .. ..@ x       : num [1:191526] 1 1 1 1 1 1 1 3 1 2 ...
##   .. .. .. .. .. ..@ factors : list()
##   .. .. .. ..@ scale.data   : num[0 , 0 ] 
##   .. .. .. ..@ key          : chr "rna_"
##   .. .. .. ..@ assay.orig   : NULL
##   .. .. .. ..@ var.features : chr(0) 
##   .. .. .. ..@ meta.features:'data.frame':   39291 obs. of  0 variables
##   .. .. .. ..@ misc         : list()
##   ..@ meta.data   :'data.frame': 234 obs. of  5 variables:
##   .. ..$ orig.ident  : Factor w/ 1 level "Indo1": 1 1 1 1 1 1 1 1 1 1 ...
##   .. ..$ nCount_RNA  : num [1:234] 1130 1408 1786 3335 4104 ...
##   .. ..$ nFeature_RNA: int [1:234] 729 813 569 1402 1412 722 825 604 528 926 ...
##   .. ..$ percent.mt  : num [1:234] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ percent.rbp : num [1:234] 0 0 0 0 0 0 0 0 0 0 ...
##   ..@ active.assay: chr "RNA"
##   ..@ active.ident: Factor w/ 1 level "Indo1": 1 1 1 1 1 1 1 1 1 1 ...
##   .. ..- attr(*, "names")= chr [1:234] "AATCACGAGCCTCTTC-1" "AATCACGCAATCTCGA-1" "AATCACGCACCCTCTA-1" "AATCACGCACTAAACC-1" ...
##   ..@ graphs      : list()
##   ..@ neighbors   : list()
##   ..@ reductions  : list()
##   ..@ images      : list()
##   ..@ project.name: chr "Indo1"
##   ..@ misc        : list()
##   ..@ version     :Classes 'package_version', 'numeric_version'  hidden list of 1
##   .. ..$ : int [1:3] 4 1 3
##   ..@ commands    : list()
##   ..@ tools       : list()
meta <- Am1@meta.data
dim(meta)
## [1] 234   5
head(meta)
##                    orig.ident nCount_RNA nFeature_RNA percent.mt percent.rbp
## AATCACGAGCCTCTTC-1      Indo1       1130          729          0           0
## AATCACGCAATCTCGA-1      Indo1       1408          813          0           0
## AATCACGCACCCTCTA-1      Indo1       1786          569          0           0
## AATCACGCACTAAACC-1      Indo1       3335         1402          0           0
## AATCACGGTTAAGCAA-1      Indo1       4104         1412          0           0
## ACAGAAAAGCCTCTTC-1      Indo1       1082          722          0           0
FeatureScatter(Am1, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")

15.3 Top 10 Most Variable

#Normalize Data
srat <- NormalizeData(Am1)
#Find most variable
srat <- FindVariableFeatures(srat, selection.method = "vst", nfeatures = 2000)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.3913
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.32311
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 8.6449e-28
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090619
top10 <- head(VariableFeatures(srat), 10)
top10
##  [1] "c022217-cryab-2"    "c031651-amil-12752" "c040363-amil-1117" 
##  [4] "c005163-hs71a"      "c036692-scr3"       "c013092-alma7-5"   
##  [7] "c031631-amil-6824"  "c016035-npc2-2"     "c025547-co6a1-2"   
## [10] "c021610-angl6-3"
#Normalize Data
srat2 <- NormalizeData(Am2)
#Find most variable
srat2 <- FindVariableFeatures(srat2, selection.method = "vst", nfeatures = 2000)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -2.4269
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.32306
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.5402e-27
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090619
top10_indo2 <- head(VariableFeatures(srat2), 10)
top10_indo2
##  [1] "c005951-pxdn-6"     "c031651-amil-12752" "c024979-cthr1-11"  
##  [4] "c027179-amil-8816"  "c016035-npc2-2"     "c032789-tasp1-3"   
##  [7] "c002932-trfm"       "c036692-scr3"       "c008174-gbp1"      
## [10] "c010251-amil-7593"
plot1 <- VariableFeaturePlot(srat)
LabelPoints(plot = plot1, points = top10, repel = TRUE, xnudge = 0, ynudge = 0)
## Warning: Transformation introduced infinite values in continuous x-axis

plot2 <- VariableFeaturePlot(srat2)
LabelPoints(plot = plot1, points = top10, repel = TRUE, xnudge = 0, ynudge = 0)
## Warning: Transformation introduced infinite values in continuous x-axis

16 Appendix

16.1 Plot_integrated_clusters function

Source: https://github.com/cellgeni/notebooks/blob/309fa36e58db8a3182a86ef6e344862795684c3d/data/custom_seurat_functions.R

plot_integrated_clusters = function (srat) { 
  ## take an integrated Seurat object, plot distributions over orig.ident
  library(Seurat)
  library(patchwork)
  library(ggplot2)
  library(reshape2)
  library(RColorBrewer)
  
  
  count_table <- table(srat@meta.data$seurat_clusters, srat@meta.data$orig.ident)
  count_mtx   <- as.data.frame.matrix(count_table)
  count_mtx$cluster <- rownames(count_mtx)
  melt_mtx    <- melt(count_mtx)
  melt_mtx$cluster <- as.factor(melt_mtx$cluster)
  
  cluster_size   <- aggregate(value ~ cluster, data = melt_mtx, FUN = sum)
  
  sorted_labels <- paste(sort(as.integer(levels(cluster_size$cluster)),decreasing = T))
  cluster_size$cluster <- factor(cluster_size$cluster,levels = sorted_labels)
  melt_mtx$cluster <- factor(melt_mtx$cluster,levels = sorted_labels)
  
  colnames(melt_mtx)[2] <- "dataset"
  
  
  p1 <- ggplot(cluster_size, aes(y= cluster,x = value)) + geom_bar(position="dodge", stat="identity",fill = "grey60") + 
    theme_bw() + scale_x_log10() + xlab("Cells per cluster, log10 scale") + ylab("")
  p2 <- ggplot(melt_mtx,aes(x=cluster,y=value,fill=dataset)) + 
    geom_bar(position="fill", stat="identity") + theme_bw() + coord_flip() + 
    scale_fill_brewer(palette = "Set2") +
    ylab("Fraction of cells in each dataset") + xlab("Cluster number") + theme(legend.position="top")
  
  p2 + p1 + plot_layout(widths = c(3,1))
}