Single-cell ATAC-seq Project

2024-12-16

true, true

Authors:

Acknowledgement: Modern GPT tools (ChatGPT, PerplexityAI and Github Copilot) were frequently used to explore various strategies, troubleshooting codes and as a replacement of search engines.

Introduction

Setup Conda Environment

We will setup the conda environment from the yml file located in ../env/environment.yml file.

# environment.yml
name: single-cell2
channels: 
  - conda-forge
  - bioconda
  - paul.martin-2
  - bu_cnio 
  - r
dependencies: 
  - r-base>=4.0.0
  - r-seurat=4.0.1 
  - r-doubletfinder
  - r-monocle3
  - bioconductor-singler
  - r-enrichr
  - r-seuratwrappers 
  - r-tidyverse
  - r-ggplot2
  - bioconductor-celldex
  - r-devtools
  - bioconductor-complexheatmap
  - r-ggpubr
  - r-matrix
  - r-parallelly  # For advanced parallel processing
  - r-future  # For multi-session parallel execution
  - r-future.apply  # Parallel apply functions
  - r-biocmanager  # Required for Bioconductor packages
  - r-remotes      # Required for installing ArchR from GitHub

  # Dependencies for ArchR
  - bioconductor-chromvar
  - bioconductor-motifmatchr
  - bioconductor-bsgenome
  - bioconductor-txdb.hsapiens.ucsc.hg19.knowngene
  - bioconductor-org.hs.eg.db
  - bioconductor-bsgenome.hsapiens.ucsc.hg19
  - bioconductor-rsamtools
  - bioconductor-genomicalignments
  - bioconductor-genomicranges
  - bioconductor-biostrings
  - r-data.table
  - bioconductor-rhdf5
  - r-patchwork

Install ArchR and dependencies

# Load necessary package managers silently
suppressMessages(suppressWarnings({
  if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
  if (!requireNamespace("devtools", quietly = TRUE)) install.packages("devtools")
}))

# Function to check and install a Bioconductor package if not already installed
install_bioc_package <- function(pkg) {
  if (!requireNamespace(pkg, quietly = TRUE)) {
    suppressMessages(suppressWarnings({
      BiocManager::install(pkg, ask = FALSE, update = FALSE)
    }))
  }
}

# Function to check and install a GitHub package if not already installed
install_github_package <- function(repo, ref = "master") {
  package_name <- basename(repo) # Extract the package name from the repo
  if (!requireNamespace(package_name, quietly = TRUE)) {
    suppressMessages(suppressWarnings({
      devtools::install_github(repo, ref = ref, quiet = TRUE)
    }))
  }
}

# Install Bioconductor dependencies if not already installed
install_bioc_package("motifmatchr")
## 
install_bioc_package("chromVAR")

# Install ArchR from GitHub if not already installed
install_github_package("GreenleafLab/ArchR")

# Install extra ArchR dependencies if not already installed
if (!requireNamespace("ArchR", quietly = TRUE)) {
  suppressMessages(suppressWarnings({
    ArchR::installExtraPackages()
  }))
}

Load libraries

packages <- c(
  "Seurat", "DoubletFinder", "monocle3", "SingleR", "enrichR", 
  "SeuratWrappers", "tidyverse", "ggplot2", "celldex", "devtools", 
  "ComplexHeatmap", "ggpubr", "Matrix", "parallel", "parallelly", 
  "future", "future.apply", "BiocManager", "remotes", 
  "chromVAR", "motifmatchr", "BSgenome", 
  "TxDb.Hsapiens.UCSC.hg19.knownGene", "org.Hs.eg.db", 
  "BSgenome.Hsapiens.UCSC.hg19", "Rsamtools", 
  "GenomicAlignments", "GenomicRanges", "Biostrings", 
  "data.table", "rhdf5", "patchwork", "ArchR"
)


# load all packages
errors <- sapply(packages, function(pkg) {
  tryCatch({
    suppressMessages(suppressWarnings(library(pkg, character.only = TRUE)))
    TRUE
  }, error = function(e) {
    FALSE
  })
})

# Report results
failed <- packages[!errors]
if (length(failed) > 0) {
  cat("The following packages failed to load:\n")
  print(failed)
} else {
  cat("All packages loaded successfully!\n")
}
## All packages loaded successfully!

Week 01

1. Preprocessing and Quality Control (5P)

1.1. Set up the Environment

1.1.1. Download data

# Define the data directory and file paths
data_dir <- 'data'
zip_file <- file.path(data_dir, 'scbi_p2.zip') # Place the zip file in the 'data' directory

# Create the data directory if it does not exist
dir.create(data_dir, showWarnings = FALSE)

# Check if the file exists before downloading
if (!file.exists(zip_file)) {
  download.file(
    url = 'https://icbb-share.s3.eu-central-1.amazonaws.com/single-cell-bioinformatics/scbi_p2.zip', 
    destfile = zip_file
  )
  unzip(zipfile = zip_file, exdir = data_dir) # Extract contents to the 'data' directory
} else {
  cat("The file already exists.")
}
## The file already exists.

1.1.2. Setup parallel

# Use 7/8th of all cpu cores for multithreading (advice from the ArchR reference)
addArchRThreads(threads = as.integer(7/8 * detectCores()))
## Setting default number of Parallel threads to 112.

1.1.3. Set the reference (default) genome for this project to the precompiled hg38 genome (explanation):

addArchRGenome("hg38")
## Setting default genome to Hg38.

1.2 Read the Data into an Appropriate Data Structure and Apply Filtering

Read the downloaded fragment files into arrow files. Use lenient filtering based on the number of fragments and TSS enrichment (e.g., filterFrags > 500, filterTSS > 4), where: - minTSS - The minimum numeric transcription start site (TSS) enrichment score required for a cell to pass filtering for use in downstream analyses. Cells with a TSS enrichment score greater than or equal to minTSS will be retained. TSS enrichment score is a measurement of signal-to-background in ATAC-seq. - minFrags - The minimum number of mapped ATAC-seq fragments required per cell to pass filtering for use in downstream analyses. Cells containing greater than or equal to minFrags total fragments wll be retained. - addTileMat - A boolean value indicating whether to add a “Tile Matrix” to each ArrowFile. A Tile Matrix is a counts matrix that, instead of using peaks, uses a fixed-width sliding window of bins across the whole genome. This matrix can be used in many downstream ArchR operations. - addGeneScoreMat - A boolean value indicating whether to add a Gene-Score Matrix to each ArrowFile. A Gene-Score Matrix uses ATAC-seq signal proximal to the TSS to estimate gene activity.

Using tileSize = 500 for TileMatParams as more popular size.

# 
# TODO: do we need to handle seurat_object.rds somehow?

files <- c(
  'data/scbi_p2/hft_ctx_w21_dc1r3_r1_atac_fragments.tsv.gz', 
  'data/scbi_p2/hft_ctx_w21_dc2r2_r1_atac_fragments.tsv.gz',
  'data/scbi_p2/hft_ctx_w21_dc2r2_r2_atac_fragments.tsv.gz'
)

samples <- c(
  'hft_ctx_w21_dc1r3_r1', 
  'hft_ctx_w21_dc2r2_r1',
  'hft_ctx_w21_dc2r2_r2'
)

# https://www.archrproject.com/bookdown/per-cell-quality-control.html

ArrowFiles <- createArrowFiles(
  inputFiles = files,
  sampleNames = samples,
  minTSS = 5, # >=
  minFrags = 501, # >=
  addTileMat = TRUE,
  addGeneScoreMat = TRUE,
  TileMatParams = list(tileSize = 500)
)
## Using GeneAnnotation set by addArchRGenome(Hg38)!
## Using GeneAnnotation set by addArchRGenome(Hg38)!
## ArchR logging to : ArchRLogs/ArchR-createArrows-348ddd31ad8165-Date-2024-12-16_Time-00-44-20.log
## If there is an issue, please report to github with logFile!
## Cleaning Temporary Files
## 2024-12-16 00:44:20 : Batch Execution w/ safelapply!, 0 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-createArrows-348ddd31ad8165-Date-2024-12-16_Time-00-44-20.log

1.4.1. Integrate all samples into a single dataset

proj <- ArchRProject(
  ArrowFiles = ArrowFiles, 
  outputDirectory = "Project",
  copyArrows = TRUE # This is recommended so that if you modify the Arrow files you have an original copy for later usage.
)
## Using GeneAnnotation set by addArchRGenome(Hg38)!
## Using GeneAnnotation set by addArchRGenome(Hg38)!
## Validating Arrows...
## Getting SampleNames...
## 
## Copying ArrowFiles to Ouptut Directory! If you want to save disk space set copyArrows = FALSE
## 1 2 3 
## Getting Cell Metadata...
## 
## Merging Cell Metadata...
## Initializing ArchRProject...
## 
##                                                    / |
##                                                  /    \
##             .                                  /      |.
##             \\\                              /        |.
##               \\\                          /           `|.
##                 \\\                      /              |.
##                   \                    /                |\
##                   \\#####\           /                  ||
##                 ==###########>      /                   ||
##                  \\##==......\    /                     ||
##             ______ =       =|__ /__                     ||      \\\
##         ,--' ,----`-,__ ___/'  --,-`-===================##========>
##        \               '        ##_______ _____ ,--,__,=##,__   ///
##         ,    __==    ___,-,__,--'#'  ==='      `-'    | ##,-/
##         -,____,---'       \\####\\________________,--\\_##,/
##            ___      .______        ______  __    __  .______      
##           /   \     |   _  \      /      ||  |  |  | |   _  \     
##          /  ^  \    |  |_)  |    |  ,----'|  |__|  | |  |_)  |    
##         /  /_\  \   |      /     |  |     |   __   | |      /     
##        /  _____  \  |  |\  \\___ |  `----.|  |  |  | |  |\  \\___.
##       /__/     \__\ | _| `._____| \______||__|  |__| | _| `._____|
## 

1.3 Identify Doublets

Add doublet score

proj <- addDoubletScores(
    input = proj,
    k = 10, # Refers to how many cells near a "pseudo-doublet" to count.
    knnMethod = "UMAP", # Refers to the embedding to use for nearest neighbor search with doublet projection.
    LSIMethod = 1 # Use the 1st LSI embedding for pseudo-doublet creation
)
## ArchR logging to : ArchRLogs/ArchR-addDoubletScores-348ddd295d47ba-Date-2024-12-16_Time-00-44-30.log
## If there is an issue, please report to github with logFile!
## 2024-12-16 00:44:30 : Batch Execution w/ safelapply!, 0 mins elapsed.
## 2024-12-16 00:44:30 : hft_ctx_w21_dc2r2_r1 (1 of 3) :  Computing Doublet Statistics, 0 mins elapsed.
## hft_ctx_w21_dc2r2_r1 (1 of 3) : UMAP Projection R^2 = 0.99776
## hft_ctx_w21_dc2r2_r1 (1 of 3) : UMAP Projection R^2 = 0.99776
## 2024-12-16 00:46:22 : hft_ctx_w21_dc1r3_r1 (2 of 3) :  Computing Doublet Statistics, 1.857 mins elapsed.
## Filtering 1 dims correlated > 0.75 to log10(depth + 1)
## hft_ctx_w21_dc1r3_r1 (2 of 3) : UMAP Projection R^2 = 0.99792
## hft_ctx_w21_dc1r3_r1 (2 of 3) : UMAP Projection R^2 = 0.99792
## 2024-12-16 00:47:57 : hft_ctx_w21_dc2r2_r2 (3 of 3) :  Computing Doublet Statistics, 3.438 mins elapsed.
## Filtering 1 dims correlated > 0.75 to log10(depth + 1)
## hft_ctx_w21_dc2r2_r2 (3 of 3) : UMAP Projection R^2 = 0.99273
## hft_ctx_w21_dc2r2_r2 (3 of 3) : UMAP Projection R^2 = 0.99273
## ArchR logging successful to : ArchRLogs/ArchR-addDoubletScores-348ddd295d47ba-Date-2024-12-16_Time-00-44-30.log

Filter Out Doublets

# Set a seed for reproducibility
set.seed(1234)

# Filter cells with high DoubletEnrichment scores (Default settings)
proj <- filterDoublets(ArchRProj = proj)
## Filtering 327 cells from ArchRProject!
##  hft_ctx_w21_dc2r2_r1 : 139 of 3733 (3.7%)
##  hft_ctx_w21_dc1r3_r1 : 119 of 3455 (3.4%)
##  hft_ctx_w21_dc2r2_r2 : 69 of 2639 (2.6%)

Report findings

library(knitr)
data <- data.frame(
  Sample = c("hft_ctx_w21_dc2r2_r1", "hft_ctx_w21_dc1r3_r1", "hft_ctx_w21_dc2r2_r2"),
  Filtered_Cells = c(139, 119, 69),
  Total_Cells = c(3733, 3455, 2639),
  Percentage_Filtered = c(3.7, 3.4, 2.6)
)

kable(data, caption = "Filtered Cells by Sample", format = "markdown", col.names = c("Sample", "Filtered Cells", "Total Cells", "Percentage Filtered (%)"))
Filtered Cells by Sample
Sample Filtered Cells Total Cells Percentage Filtered (%)
hft_ctx_w21_dc2r2_r1 139 3733 3.7
hft_ctx_w21_dc1r3_r1 119 3455 3.4
hft_ctx_w21_dc2r2_r2 69 2639 2.6

1.4 Collect All Samples into a Joint Data Structure

Already done.

# Inspect cell metadata
cell_metadata <- getCellColData(proj)
head(cell_metadata)
## DataFrame with 6 rows and 15 columns
##                                                       Sample TSSEnrichment
##                                                  <character>     <numeric>
## hft_ctx_w21_dc2r2_r1#GGTTGAGCAGCTAATT-1 hft_ctx_w21_dc2r2_r1        16.770
## hft_ctx_w21_dc2r2_r1#TATGACTCACCATATG-1 hft_ctx_w21_dc2r2_r1        13.382
## hft_ctx_w21_dc2r2_r1#ATTAGTCCAATCCTAG-1 hft_ctx_w21_dc2r2_r1        14.003
## hft_ctx_w21_dc2r2_r1#GATTGGTTCGATTTAG-1 hft_ctx_w21_dc2r2_r1        15.894
## hft_ctx_w21_dc2r2_r1#TCAAGGAAGCCTCTGT-1 hft_ctx_w21_dc2r2_r1        18.798
## hft_ctx_w21_dc2r2_r1#GTACTAGGTTAATGAC-1 hft_ctx_w21_dc2r2_r1        19.140
##                                         ReadsInTSS ReadsInPromoter
##                                          <numeric>       <numeric>
## hft_ctx_w21_dc2r2_r1#GGTTGAGCAGCTAATT-1      29726           29236
## hft_ctx_w21_dc2r2_r1#TATGACTCACCATATG-1      36317           36120
## hft_ctx_w21_dc2r2_r1#ATTAGTCCAATCCTAG-1      43108           43218
## hft_ctx_w21_dc2r2_r1#GATTGGTTCGATTTAG-1      41769           40645
## hft_ctx_w21_dc2r2_r1#TCAAGGAAGCCTCTGT-1      44390           42464
## hft_ctx_w21_dc2r2_r1#GTACTAGGTTAATGAC-1      40848           39222
##                                         ReadsInBlacklist PromoterRatio
##                                                <numeric>     <numeric>
## hft_ctx_w21_dc2r2_r1#GGTTGAGCAGCTAATT-1             1037      0.165743
## hft_ctx_w21_dc2r2_r1#TATGACTCACCATATG-1              614      0.214014
## hft_ctx_w21_dc2r2_r1#ATTAGTCCAATCCTAG-1              675      0.259241
## hft_ctx_w21_dc2r2_r1#GATTGGTTCGATTTAG-1              618      0.248402
## hft_ctx_w21_dc2r2_r1#TCAAGGAAGCCTCTGT-1              581      0.269698
## hft_ctx_w21_dc2r2_r1#GTACTAGGTTAATGAC-1              579      0.252238
##                                            PassQC NucleosomeRatio nMultiFrags
##                                         <numeric>       <numeric>   <numeric>
## hft_ctx_w21_dc2r2_r1#GGTTGAGCAGCTAATT-1         1        1.728614       12778
## hft_ctx_w21_dc2r2_r1#TATGACTCACCATATG-1         1        0.775821        8914
## hft_ctx_w21_dc2r2_r1#ATTAGTCCAATCCTAG-1         1        0.968008        9589
## hft_ctx_w21_dc2r2_r1#GATTGGTTCGATTTAG-1         1        0.755267        8656
## hft_ctx_w21_dc2r2_r1#TCAAGGAAGCCTCTGT-1         1        1.040671       10974
## hft_ctx_w21_dc2r2_r1#GTACTAGGTTAATGAC-1         1        0.935282        9906
##                                         nMonoFrags    nFrags  nDiFrags
##                                          <numeric> <numeric> <numeric>
## hft_ctx_w21_dc2r2_r1#GGTTGAGCAGCTAATT-1      32323     88197     43096
## hft_ctx_w21_dc2r2_r1#TATGACTCACCATATG-1      47520     84387     27953
## hft_ctx_w21_dc2r2_r1#ATTAGTCCAATCCTAG-1      42355     83355     31411
## hft_ctx_w21_dc2r2_r1#GATTGGTTCGATTTAG-1      46610     81813     26547
## hft_ctx_w21_dc2r2_r1#TCAAGGAAGCCTCTGT-1      38578     78725     29173
## hft_ctx_w21_dc2r2_r1#GTACTAGGTTAATGAC-1      40174     77748     27668
##                                         DoubletScore DoubletEnrichment
##                                            <numeric>         <numeric>
## hft_ctx_w21_dc2r2_r1#GGTTGAGCAGCTAATT-1       0.0000           1.13333
## hft_ctx_w21_dc2r2_r1#TATGACTCACCATATG-1      50.4814           4.33333
## hft_ctx_w21_dc2r2_r1#ATTAGTCCAATCCTAG-1       0.0000           1.00000
## hft_ctx_w21_dc2r2_r1#GATTGGTTCGATTTAG-1      33.0662           3.60000
## hft_ctx_w21_dc2r2_r1#TCAAGGAAGCCTCTGT-1      50.4814           4.33333
## hft_ctx_w21_dc2r2_r1#GTACTAGGTTAATGAC-1      27.0897           3.33333
##                                         BlacklistRatio
##                                              <numeric>
## hft_ctx_w21_dc2r2_r1#GGTTGAGCAGCTAATT-1     0.00587888
## hft_ctx_w21_dc2r2_r1#TATGACTCACCATATG-1     0.00363800
## hft_ctx_w21_dc2r2_r1#ATTAGTCCAATCCTAG-1     0.00404895
## hft_ctx_w21_dc2r2_r1#GATTGGTTCGATTTAG-1     0.00377691
## hft_ctx_w21_dc2r2_r1#TCAAGGAAGCCTCTGT-1     0.00369006
## hft_ctx_w21_dc2r2_r1#GTACTAGGTTAATGAC-1     0.00372357

Apply filter

How many cells are included in your project?

n_cells <- nCells(proj)
cat("Cells included in project:", n_cells)
## Cells included in project: 9500

What is the median TSS value and the median number of fragments?

median_TSS <- median(cell_metadata$TSSEnrichment, na.rm = TRUE)
cat("the median TSS value:", median_TSS)
## the median TSS value: 19.89
median_frags <- median(cell_metadata$nFrags, na.rm = TRUE)
cat("the median number of fragments:", median_frags)
## the median number of fragments: 9193

What is the dimension of the peak set your dataset?

Irem Begüm Gündüz (03.12 12:19): Since you didn’t call the peaks yet, you can use the TileMatrix dimensions. Please refer to function https://www.archrproject.com/reference/getMatrixFromProject.html, don’t forget to change the parameter useMatrix=“TileMatrix”

matrix <- getMatrixFromProject(
  ArchRProj = proj,
  useMatrix="TileMatrix",
  binarize = TRUE
)
## ArchR logging to : ArchRLogs/ArchR-getMatrixFromProject-348ddd27475a81-Date-2024-12-16_Time-00-49-26.log
## If there is an issue, please report to github with logFile!
## 2024-12-16 00:50:21 : Organizing colData, 0.92 mins elapsed.
## 2024-12-16 00:50:21 : Organizing rowData, 0.92 mins elapsed.
## 2024-12-16 00:50:21 : Organizing rowRanges, 0.922 mins elapsed.
## 2024-12-16 00:50:21 : Organizing Assays (1 of 1), 0.922 mins elapsed.
## 2024-12-16 00:50:27 : Constructing SummarizedExperiment, 1.019 mins elapsed.
## 2024-12-16 00:50:28 : Finished Matrix Creation, 1.038 mins elapsed.
cat("dimension of the peak set of the dataset:", dim(matrix)[1], "x", dim(matrix)[2])
## dimension of the peak set of the dataset: 6062095 x 9500

1.5 Quality Control

Number of Cells per Sample

library(ggplot2)

# Get cell metadata
cell_metadata <- getCellColData(proj)

# Count cells per sample
cell_counts <- table(cell_metadata$Sample)
cat("Cells per each sample:", cell_counts)
## Cells per each sample: 3336 3594 2570
cell_df <- as.data.frame(cell_counts)
colnames(cell_df) <- c("Sample", "Count")


# Bar plot of cell counts per sample
p_cells <- ggplot(cell_df, aes(x = Sample, y = Count, fill = Sample)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = Count), colour = "white", vjust = 10, size = 4)
  theme_bw() +
  labs(title = "Number of Cells per Sample", x = "Sample", y = "Number of Cells") +
  theme(legend.position = "none")
## List of 95
##  $ line                      :List of 6
##   ..$ colour       : chr "black"
##   ..$ size         : num 0.5
##   ..$ linetype     : num 1
##   ..$ lineend      : chr "butt"
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ rect                      :List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : chr "black"
##   ..$ size         : num 0.5
##   ..$ linetype     : num 1
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ text                      :List of 11
##   ..$ family       : chr ""
##   ..$ face         : chr "plain"
##   ..$ colour       : chr "black"
##   ..$ size         : num 11
##   ..$ hjust        : num 0.5
##   ..$ vjust        : num 0.5
##   ..$ angle        : num 0
##   ..$ lineheight   : num 0.9
##   ..$ margin       : 'margin' num [1:4] 0points 0points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ title                     : chr "Number of Cells per Sample"
##  $ aspect.ratio              : NULL
##  $ axis.title                : NULL
##  $ axis.title.x              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 2.75points 0points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.x.top          :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 0
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 2.75points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.x.bottom       : NULL
##  $ axis.title.y              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : num 90
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 2.75points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.y.left         : NULL
##  $ axis.title.y.right        :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 0
##   ..$ angle        : num -90
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 0points 2.75points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text                 :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : chr "grey30"
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.x               :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 2.2points 0points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.x.top           :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 0
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 2.2points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.x.bottom        : NULL
##  $ axis.text.y               :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 1
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 2.2points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.y.left          : NULL
##  $ axis.text.y.right         :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 0points 2.2points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.ticks                :List of 6
##   ..$ colour       : chr "grey20"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ lineend      : NULL
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ axis.ticks.x              : NULL
##  $ axis.ticks.x.top          : NULL
##  $ axis.ticks.x.bottom       : NULL
##  $ axis.ticks.y              : NULL
##  $ axis.ticks.y.left         : NULL
##  $ axis.ticks.y.right        : NULL
##  $ axis.ticks.length         : 'simpleUnit' num 2.75points
##   ..- attr(*, "unit")= int 8
##  $ axis.ticks.length.x       : NULL
##  $ axis.ticks.length.x.top   : NULL
##  $ axis.ticks.length.x.bottom: NULL
##  $ axis.ticks.length.y       : NULL
##  $ axis.ticks.length.y.left  : NULL
##  $ axis.ticks.length.y.right : NULL
##  $ axis.line                 : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ axis.line.x               : NULL
##  $ axis.line.x.top           : NULL
##  $ axis.line.x.bottom        : NULL
##  $ axis.line.y               : NULL
##  $ axis.line.y.left          : NULL
##  $ axis.line.y.right         : NULL
##  $ legend.background         :List of 5
##   ..$ fill         : NULL
##   ..$ colour       : logi NA
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ legend.margin             : 'margin' num [1:4] 5.5points 5.5points 5.5points 5.5points
##   ..- attr(*, "unit")= int 8
##  $ legend.spacing            : 'simpleUnit' num 11points
##   ..- attr(*, "unit")= int 8
##  $ legend.spacing.x          : NULL
##  $ legend.spacing.y          : NULL
##  $ legend.key                :List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : logi NA
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ legend.key.size           : 'simpleUnit' num 1.2lines
##   ..- attr(*, "unit")= int 3
##  $ legend.key.height         : NULL
##  $ legend.key.width          : NULL
##  $ legend.text               :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ legend.text.align         : NULL
##  $ legend.title              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ legend.title.align        : NULL
##  $ legend.position           : chr "none"
##  $ legend.direction          : NULL
##  $ legend.justification      : chr "center"
##  $ legend.box                : NULL
##  $ legend.box.just           : NULL
##  $ legend.box.margin         : 'margin' num [1:4] 0cm 0cm 0cm 0cm
##   ..- attr(*, "unit")= int 1
##  $ legend.box.background     : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ legend.box.spacing        : 'simpleUnit' num 11points
##   ..- attr(*, "unit")= int 8
##  $ panel.background          :List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : logi NA
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ panel.border              :List of 5
##   ..$ fill         : logi NA
##   ..$ colour       : chr "grey20"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ panel.spacing             : 'simpleUnit' num 5.5points
##   ..- attr(*, "unit")= int 8
##  $ panel.spacing.x           : NULL
##  $ panel.spacing.y           : NULL
##  $ panel.grid                :List of 6
##   ..$ colour       : chr "grey92"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ lineend      : NULL
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ panel.grid.major          : NULL
##  $ panel.grid.minor          :List of 6
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 0.5
##   ..$ linetype     : NULL
##   ..$ lineend      : NULL
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ panel.grid.major.x        : NULL
##  $ panel.grid.major.y        : NULL
##  $ panel.grid.minor.x        : NULL
##  $ panel.grid.minor.y        : NULL
##  $ panel.ontop               : logi FALSE
##  $ plot.background           :List of 5
##   ..$ fill         : NULL
##   ..$ colour       : chr "white"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ plot.title                :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 1.2
##   ..$ hjust        : num 0
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 5.5points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ plot.title.position       : chr "panel"
##  $ plot.subtitle             :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 5.5points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ plot.caption              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : num 1
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 5.5points 0points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ plot.caption.position     : chr "panel"
##  $ plot.tag                  :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 1.2
##   ..$ hjust        : num 0.5
##   ..$ vjust        : num 0.5
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ plot.tag.position         : chr "topleft"
##  $ plot.margin               : 'margin' num [1:4] 5.5points 5.5points 5.5points 5.5points
##   ..- attr(*, "unit")= int 8
##  $ strip.background          :List of 5
##   ..$ fill         : chr "grey85"
##   ..$ colour       : chr "grey20"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ strip.background.x        : NULL
##  $ strip.background.y        : NULL
##  $ strip.placement           : chr "inside"
##  $ strip.text                :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : chr "grey10"
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 4.4points 4.4points 4.4points 4.4points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ strip.text.x              : NULL
##  $ strip.text.y              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : num -90
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ strip.switch.pad.grid     : 'simpleUnit' num 2.75points
##   ..- attr(*, "unit")= int 8
##  $ strip.switch.pad.wrap     : 'simpleUnit' num 2.75points
##   ..- attr(*, "unit")= int 8
##  $ strip.text.y.left         :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : num 90
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ x                         : chr "Sample"
##  $ y                         : chr "Number of Cells"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi TRUE
##  - attr(*, "validate")= logi TRUE
p_cells

Fragment Length Distribution for All Samples

p_frag_sizes <- plotFragmentSizes(ArchRProj = proj)
## ArchR logging to : ArchRLogs/ArchR-plotFragmentSizes-348ddd5391f190-Date-2024-12-16_Time-00-50-29.log
## If there is an issue, please report to github with logFile!
## ArchR logging successful to : ArchRLogs/ArchR-plotFragmentSizes-348ddd5391f190-Date-2024-12-16_Time-00-50-29.log
p_frag_sizes

samples <- unique(cell_metadata$Sample)

# Plot fragment sizes per sample by subsetting the project
for (s in samples) {
  # Subset cells from this sample
  proj_sub <- proj[getCellNames(proj)[getCellColData(proj)$Sample == s]]
  
  p_frag_sub <- plotFragmentSizes(ArchRProj = proj_sub) +
    ggtitle(paste("Fragment Sizes -", s))
  
  print(p_frag_sub)
}
## ArchR logging to : ArchRLogs/ArchR-plotFragmentSizes-348ddd4b43f54f-Date-2024-12-16_Time-00-50-51.log
## If there is an issue, please report to github with logFile!
## 2024-12-16 00:50:51 : hft_ctx_w21_dc2r2_r1 Computing FragmentSizes (1 of 1)!, 0.002 mins elapsed.
## 2024-12-16 00:51:04 : hft_ctx_w21_dc2r2_r1 Finished Computing FragmentSizes (1 of 1)!, 0.224 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-plotFragmentSizes-348ddd4b43f54f-Date-2024-12-16_Time-00-50-51.log
## ArchR logging to : ArchRLogs/ArchR-plotFragmentSizes-348ddd443c691d-Date-2024-12-16_Time-00-51-04.log
## If there is an issue, please report to github with logFile!
## 2024-12-16 00:51:04 : hft_ctx_w21_dc1r3_r1 Computing FragmentSizes (1 of 1)!, 0.002 mins elapsed.
## 2024-12-16 00:51:14 : hft_ctx_w21_dc1r3_r1 Finished Computing FragmentSizes (1 of 1)!, 0.158 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-plotFragmentSizes-348ddd443c691d-Date-2024-12-16_Time-00-51-04.log

## ArchR logging to : ArchRLogs/ArchR-plotFragmentSizes-348ddd67324dfc-Date-2024-12-16_Time-00-51-14.log
## If there is an issue, please report to github with logFile!
## 2024-12-16 00:51:14 : hft_ctx_w21_dc2r2_r2 Computing FragmentSizes (1 of 1)!, 0.002 mins elapsed.
## 2024-12-16 00:51:22 : hft_ctx_w21_dc2r2_r2 Finished Computing FragmentSizes (1 of 1)!, 0.142 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-plotFragmentSizes-348ddd67324dfc-Date-2024-12-16_Time-00-51-14.log

Distribution of TSS Enrichment Scores per Sample

tss_data <- data.frame(
  Sample = cell_metadata$Sample,
  TSSEnrichment = cell_metadata$TSSEnrichment
)

p_tss <- ggplot(tss_data, aes(x = Sample, y = TSSEnrichment, fill = Sample)) +
  geom_violin(trim = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) +
  theme_bw() +
  labs(title = "TSS Enrichment by Sample", x = "Sample", y = "TSS Enrichment") +
  theme(legend.position = "none")

p_tss

Number of Fragments vs TSS Enrichment for Each Sample

library(ggplot2)

frag_data <- data.frame(
  Sample = cell_metadata$Sample,
  TSSEnrichment = cell_metadata$TSSEnrichment,
  nFrags = cell_metadata$nFrags
)

library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
p_combined <- ggplot(frag_data, aes(x = nFrags, y = TSSEnrichment)) +
  geom_point(alpha = 0.5) +
  facet_wrap(~ Sample, scales = "free") +
  theme_bw() +
  labs(title = "TSS Enrichment vs Number of Fragments by Sample",
       x = "Number of Fragments",
       y = "TSS Enrichment") +
  scale_x_continuous(labels = label_number_si())
## Warning: `label_number_si()` was deprecated in scales 1.2.0.
## Please use the `scale_cut` argument of `label_number()` instead.
p_combined

1.6. Filter the Dataset

After 1.5, we revisited and applied the following filter to remove some outliers.

The “standard” QC filters typically include a combination of a minimum fragment cutoff, a minimum TSS enrichment score, a low blacklist ratio, and removal of doublets. We already filtered the doublets with default values before. By imposing a minimum fragment cutoff, we ensure that each cell has sufficient coverage to reliably infer chromatin accessibility patterns. The TSS enrichment threshold removes cells with weak transcription start site signals, which are generally considered lower quality. Limiting the blacklist ratio reduces the influence of reads mapping to problematic genomic regions known to produce spurious signals. Finally, removing cells with extremely high fragment counts (beyond the 99th percentile) helps mitigate the effects of potential doublets or anomalously over-sequenced cells.

# Set thresholds
min_frags = 1000
min_tss = 6
max_frags <- quantile(proj$nFrags, 0.99)

# Subset the project cells based on these criteria
proj <- proj[
  proj$nFrags >= min_frags &
  proj$nFrags <= max_frags &
  proj$TSSEnrichment >= min_tss
]

Week 02

2. Dimensionality Reduction (5P)

2.1 Iterative LSI

Apply ArchR’s Iterative Latent Semantic Indexing (LSI).

The values chosen for the addIterativeLSI function parameters are based on common practices in scATAC-seq analyses, some of them (resolution) to reduce subtle batch effects.

proj <- addIterativeLSI(
  ArchRProj = proj,
  useMatrix = "TileMatrix", # Use TileMatrix for dimensionality reduction
  name = "IterativeLSI",
  clusterParams = list(
    resolution = c(0.2), # Resolution for clustering
    sampleCells = 10000, # Number of cells to subsample
    n.start = 10 # Number of random starts for clustering
  ),
  varFeatures = 15000, # Number of variable features
  force = TRUE
)
## Checking Inputs...
## ArchR logging to : ArchRLogs/ArchR-addIterativeLSI-348ddd2acd4f6f-Date-2024-12-16_Time-00-51-25.log
## If there is an issue, please report to github with logFile!
## 2024-12-16 00:51:25 : Computing Total Across All Features, 0.012 mins elapsed.
## 2024-12-16 00:51:27 : Computing Top Features, 0.033 mins elapsed.
## ###########
## 2024-12-16 00:51:29 : Running LSI (1 of 2) on Top Features, 0.065 mins elapsed.
## ###########
## 2024-12-16 00:51:29 : Creating Partial Matrix, 0.065 mins elapsed.
## 2024-12-16 00:51:38 : Computing LSI, 0.216 mins elapsed.
## 2024-12-16 00:52:06 : Identifying Clusters, 0.696 mins elapsed.
## 2024-12-16 00:52:22 : Identified 5 Clusters, 0.947 mins elapsed.
## 2024-12-16 00:52:22 : Saving LSI Iteration, 0.947 mins elapsed.
## 2024-12-16 00:52:36 : Creating Cluster Matrix on the total Group Features, 1.181 mins elapsed.
## 2024-12-16 00:52:47 : Computing Variable Features, 1.375 mins elapsed.
## ###########
## 2024-12-16 00:52:47 : Running LSI (2 of 2) on Variable Features, 1.378 mins elapsed.
## ###########
## 2024-12-16 00:52:47 : Creating Partial Matrix, 1.378 mins elapsed.
## 2024-12-16 00:52:56 : Computing LSI, 1.525 mins elapsed.
## 2024-12-16 00:53:10 : Finished Running IterativeLSI, 1.761 mins elapsed.

Justify why LSI is preferred over PCA for scATAC-seq data

LSI is specifically tailored for scATAC-seq data because it accounts for sparsity, binary nature, and sequencing depth differences. PCA alone does not perform these adjustments, making it less effective for this type of data. LSI ensures that the reduced dimensions capture meaningful biological variability, improving clustering and downstream analyses such as cell type annotation and trajectory inference.

2.2 UMAP with Sample Annotation and QC Metrics

Generate UMAP coordinates using ArchR (UMAP documentation)

# Add UMAP embedding based on an existing IterativeLSI
proj <- addUMAP(
  ArchRProj = proj,
  reducedDims = "IterativeLSI", 
  name = "UMAP_IterativeLSI",
  nNeighbors = 30, 
  minDist = 0.5
)
## 00:53:10 UMAP embedding parameters a = 0.583 b = 1.334
## 00:53:10 Read 9000 rows and found 30 numeric columns
## 00:53:10 Using Annoy for neighbor search, n_neighbors = 30
## 00:53:10 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:53:11 Writing NN index file to temp file /tmp/RtmpqKZWi6/file348ddd50abd47c
## 00:53:11 Searching Annoy index using 64 threads, search_k = 3000
## 00:53:11 Annoy recall = 100%
## 00:53:12 Commencing smooth kNN distance calibration using 64 threads with target n_neighbors = 30
## 00:53:13 Initializing from normalized Laplacian + noise (using irlba)
## 00:53:14 Commencing optimization for 500 epochs, with 398386 positive edges
## 00:53:24 Optimization finished
## 00:53:24 Creating temp model dir /tmp/RtmpqKZWi6/dir348ddd322dd7a
## 00:53:24 Creating dir /tmp/RtmpqKZWi6/dir348ddd322dd7a
## 00:53:25 Changing to /tmp/RtmpqKZWi6/dir348ddd322dd7a
## 00:53:25 Creating /home/mrahman/scATACseq/Project/Embeddings/Save-Uwot-UMAP-Params-IterativeLSI-348ddd737dbfa9-Date-2024-12-16_Time-00-53-24.tar

Plots

Color UMAP plots by sample, TSS enrichment, and number of fragments.

# Plot UMAP Colored by Sample
p_sample <- plotEmbedding(
  ArchRProj = proj,
  colorBy = "cellColData",
  name = "Sample",
  embedding = "UMAP_IterativeLSI",
  label.position = "right"
)
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-348ddd1ed614ea-Date-2024-12-16_Time-00-53-25.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1 
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-348ddd1ed614ea-Date-2024-12-16_Time-00-53-25.log
p_sample

# Plot UMAP Colored by TSS Enrichment
p_tss <- plotEmbedding(
  ArchRProj = proj,
  colorBy = "cellColData",
  name = "TSSEnrichment",
  embedding = "UMAP_IterativeLSI"
)
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-348ddd8c9e4c5-Date-2024-12-16_Time-00-53-27.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1 
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-348ddd8c9e4c5-Date-2024-12-16_Time-00-53-27.log
# Plot UMAP Colored by Number of Fragments
p_frags <- plotEmbedding(
  ArchRProj = proj,
  colorBy = "cellColData",
  name = "nFrags",
  embedding = "UMAP_IterativeLSI"
)
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-348ddd5e5b0e25-Date-2024-12-16_Time-00-53-28.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1 
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-348ddd5e5b0e25-Date-2024-12-16_Time-00-53-28.log
library(patchwork)

# Combine all plots together
combined_plot <- p_sample + p_tss + p_frags +
  plot_annotation(title = "UMAP Plots Colored by Different Metrics")

# View the combined plot
print(combined_plot)

2.3 Dealing with Batch Effects

After examining the UMAP plot where cells are colored by their respective sample, we see distinct clustering of cells by sample, which indicates batch effect. Also, the QC metrics (specially number of fragments) is not evenly distributed as well.

Correcting Batch Effects

suppressMessages(
  suppressWarnings(
    install.packages(
      "https://github.com/immunogenomics/harmony/archive/refs/tags/0.1.tar.gz", 
      repos=NULL,
      type="source"
      )
    )
  )

library(harmony)


# Correct batch effects using Harmony
proj <- addHarmony(
  ArchRProj = proj,
  reducedDims = "IterativeLSI",  # Use the LSI reduced dimensions
  name = "Harmony",
  groupBy = "Sample"  # Use sample as the batch variable
)
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony converged after 3 iterations
# Compute UMAP with Harmony-corrected dimensions
proj <- addUMAP(
  ArchRProj = proj,
  reducedDims = "Harmony",  # Use Harmony-corrected reduced dimensions
  name = "UMAP_Harmony",
  nNeighbors = 30,
  minDist = 0.5
)
## 00:54:10 UMAP embedding parameters a = 0.583 b = 1.334
## 00:54:10 Read 9000 rows and found 30 numeric columns
## 00:54:10 Using Annoy for neighbor search, n_neighbors = 30
## 00:54:10 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:54:11 Writing NN index file to temp file /tmp/RtmpqKZWi6/file348dddf3b7a3e
## 00:54:11 Searching Annoy index using 64 threads, search_k = 3000
## 00:54:11 Annoy recall = 100%
## 00:54:13 Commencing smooth kNN distance calibration using 64 threads with target n_neighbors = 30
## 00:54:14 Initializing from normalized Laplacian + noise (using irlba)
## 00:54:14 Commencing optimization for 500 epochs, with 402324 positive edges
## 00:54:25 Optimization finished
## 00:54:25 Creating temp model dir /tmp/RtmpqKZWi6/dir348ddd53abe301
## 00:54:25 Creating dir /tmp/RtmpqKZWi6/dir348ddd53abe301
## 00:54:26 Changing to /tmp/RtmpqKZWi6/dir348ddd53abe301
## 00:54:26 Creating /home/mrahman/scATACseq/Project/Embeddings/Save-Uwot-UMAP-Params-Harmony-348ddd7e326fe2-Date-2024-12-16_Time-00-54-25.tar
# UMAP colored by Sample (After Batch Correction)
p_sample_after <- plotEmbedding(
  ArchRProj = proj,
  colorBy = "cellColData",
  name = "Sample",
  embedding = "UMAP_Harmony"
) +
  labs(
    title = "UMAP by Sample (After Correction)",
    x = "UMAP Dimension 1",
    y = "UMAP Dimension 2"
  ) +
  theme_minimal() +
  theme(
    panel.grid = element_blank(),          # Remove gridlines
    legend.position = "bottom",            # Position legend at bottom
    legend.box = "horizontal",             # Arrange legend items horizontally
    plot.title = element_text(size = 14),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10)
  )
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-348ddd700d11fb-Date-2024-12-16_Time-00-54-26.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1 
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-348ddd700d11fb-Date-2024-12-16_Time-00-54-26.log
# UMAP colored by TSS Enrichment (After Batch Correction)
p_tss_after <- plotEmbedding(
  ArchRProj = proj,
  colorBy = "cellColData",
  name = "TSSEnrichment",
  embedding = "UMAP_Harmony"
) +
  labs(
    title = "UMAP by TSS Enrichment (After Correction)",
    x = "UMAP Dimension 1",
    y = "UMAP Dimension 2",
    color = "TSS Enrichment"
  ) +
  scale_color_viridis_c(option = "viridis") +
  theme_minimal() +
  theme(
    panel.grid = element_blank(),          # Remove gridlines
    legend.position = "bottom",            # Position legend at bottom
    legend.box = "horizontal",             # Arrange legend items horizontally
    plot.title = element_text(size = 14),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10)
  )
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-348ddd37e71253-Date-2024-12-16_Time-00-54-27.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1 
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-348ddd37e71253-Date-2024-12-16_Time-00-54-27.log
# UMAP colored by Number of Fragments (After Batch Correction)
p_frags_after <- plotEmbedding(
  ArchRProj = proj,
  colorBy = "cellColData",
  name = "nFrags",
  embedding = "UMAP_Harmony"
) +
  labs(
    title = "UMAP by Number of Fragments (After Correction)",
    x = "UMAP Dimension 1",
    y = "UMAP Dimension 2",
    color = "Number of Fragments"
  ) +
  scale_color_viridis_c(option = "plasma") +
  theme_minimal() +
  theme(
    panel.grid = element_blank(),          # Remove gridlines
    legend.position = "bottom",            # Position legend at bottom
    legend.box = "horizontal",             # Arrange legend items horizontally
    plot.title = element_text(size = 14),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10)
  )
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-348ddd2c7e22b9-Date-2024-12-16_Time-00-54-28.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1 
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-348ddd2c7e22b9-Date-2024-12-16_Time-00-54-28.log
library(ggpubr)

# Combine plots with ggpubr
combined_plot <- ggarrange(
  p_sample, p_sample_after,
  p_tss, p_tss_after,
  p_frags, p_frags_after,
  ncol = 2  # Arrange into 2 columns
)

print(combined_plot)
## $`1`

## 
## $`2`

## 
## $`3`

## 
## attr(,"class")
## [1] "list"      "ggarrange"

Observation

The batch correction step has effectively reduced sample-specific biases, enabling a biologically meaningful analysis of the data.

3. Clustering (3P)

# Apply Louvain clustering
proj <- addClusters(
  input = proj,
  reducedDims = "Harmony", 
  method = "Seurat",      
  name = "Clusters",
  resolution = 0.8
)
## ArchR logging to : ArchRLogs/ArchR-addClusters-348ddd7d537f63-Date-2024-12-16_Time-00-54-34.log
## If there is an issue, please report to github with logFile!
## 2024-12-16 00:54:34 : Running Seurats FindClusters (Stuart et al. Cell 2019), 0.001 mins elapsed.
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 9000
## Number of edges: 369828
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8623
## Number of communities: 16
## Elapsed time: 0 seconds
## 2024-12-16 00:54:48 : Testing Biased Clusters, 0.234 mins elapsed.
## 2024-12-16 00:54:48 : Testing Outlier Clusters, 0.235 mins elapsed.
## 2024-12-16 00:54:48 : Assigning Cluster Names to 16 Clusters, 0.235 mins elapsed.
## 2024-12-16 00:54:48 : Finished addClusters, 0.236 mins elapsed.
# Plot UMAP colored by clusters
p_clusters <- plotEmbedding(
  ArchRProj = proj,
  colorBy = "cellColData",
  name = "Clusters",
  embedding = "UMAP_Harmony"
) +
  labs(
    title = "UMAP Colored by Clusters",
    x = "UMAP Dimension 1",
    y = "UMAP Dimension 2"
  ) +
  theme_minimal() +
  theme(
    panel.grid = element_blank(),          # Remove gridlines
    legend.position = "bottom",            # Position legend at bottom
    legend.box = "horizontal",             # Arrange legend items horizontally
    plot.title = element_text(size = 14, face = "bold"),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10)
  )
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-348ddd733992a2-Date-2024-12-16_Time-00-54-48.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1 
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-348ddd733992a2-Date-2024-12-16_Time-00-54-48.log
# View the plot
print(p_clusters)

# Count cells in each cluster
cluster_counts <- table(getCellColData(proj, "Clusters"))

# Display cluster counts
print(cluster_counts)
## 
##   C1  C10  C11  C12  C13  C14  C15  C16   C2   C3   C4   C5   C6   C7   C8   C9 
##   26  586  186  499  930 1175 1058  845   48  673  572  282  225 1045  725  125
# Create a contingency table of clusters and samples
sample_cluster_table <- table(
  Cluster = getCellColData(proj, "Clusters"),
  Sample = getCellColData(proj, "Sample")
)

# Convert to proportions
sample_cluster_proportions <- prop.table(sample_cluster_table, margin = 1) * 100  # Percentage by cluster

# Display proportions
print(sample_cluster_proportions)
##                 Sample.Sample
## Cluster.Clusters hft_ctx_w21_dc1r3_r1 hft_ctx_w21_dc2r2_r1 hft_ctx_w21_dc2r2_r2
##              C1              23.07692             42.30769             34.61538
##              C10             30.03413             40.27304             29.69283
##              C11             32.79570             43.54839             23.65591
##              C12             25.45090             46.09218             28.45691
##              C13             33.44086             36.23656             30.32258
##              C14             26.72340             42.80851             30.46809
##              C15             37.80718             37.61815             24.57467
##              C16             29.11243             42.48521             28.40237
##              C2              20.83333             54.16667             25.00000
##              C3              32.98663             42.34770             24.66568
##              C4              36.36364             37.58741             26.04895
##              C5              30.14184             43.97163             25.88652
##              C6              47.55556             17.33333             35.11111
##              C7              50.81340             30.14354             19.04306
##              C8              43.31034             32.82759             23.86207
##              C9              52.00000             29.60000             18.40000
# Save as a dataframe for plotting
sample_cluster_df <- as.data.frame(sample_cluster_proportions)

4. Peaks (8P)

4.1 Peak Calling

ArchR uses MACS - Model-based Analysis of ChIP-Seq (MACS) method used for identifying transcript factor binding sites. MACS captures the influence of genome complexity to evaluate the significance of enriched ChIP regions and MACS improves the spatial resolution of binding sites through combining the information of both sequencing tag position and orientation reference

Install Macs3:

pip install macs3
## Requirement already satisfied: macs3 in /home/mrahman/miniforge3/envs/single-cell/lib/python3.11/site-packages (3.0.2)
## Requirement already satisfied: numpy<2.0.0,>=1.25 in /home/mrahman/miniforge3/envs/single-cell/lib/python3.11/site-packages (from macs3) (1.26.4)
## Requirement already satisfied: scipy>=1.12 in /home/mrahman/miniforge3/envs/single-cell/lib/python3.11/site-packages (from macs3) (1.14.1)
## Requirement already satisfied: hmmlearn>=0.3.2 in /home/mrahman/miniforge3/envs/single-cell/lib/python3.11/site-packages (from macs3) (0.3.3)
## Requirement already satisfied: scikit-learn>=1.3 in /home/mrahman/miniforge3/envs/single-cell/lib/python3.11/site-packages (from macs3) (1.6.0)
## Requirement already satisfied: cykhash<3.0,>=2.0 in /home/mrahman/miniforge3/envs/single-cell/lib/python3.11/site-packages (from macs3) (2.0.1)
## Requirement already satisfied: joblib>=1.2.0 in /home/mrahman/miniforge3/envs/single-cell/lib/python3.11/site-packages (from scikit-learn>=1.3->macs3) (1.4.2)
## Requirement already satisfied: threadpoolctl>=3.1.0 in /home/mrahman/miniforge3/envs/single-cell/lib/python3.11/site-packages (from scikit-learn>=1.3->macs3) (3.5.0)

Need to reopen session (or terminal) after this command

# Add group coverage for peak calling
proj <- addGroupCoverages(
  ArchRProj = proj,
  groupBy = "Clusters"  # Group by clusters
)
## ArchR logging to : ArchRLogs/ArchR-addGroupCoverages-348ddd328b8e0e-Date-2024-12-16_Time-00-54-51.log
## If there is an issue, please report to github with logFile!
## C1 (1 of 16) : CellGroups N = 2
## C2 (2 of 16) : CellGroups N = 2
## C3 (3 of 16) : CellGroups N = 3
## C4 (4 of 16) : CellGroups N = 3
## C5 (5 of 16) : CellGroups N = 3
## C6 (6 of 16) : CellGroups N = 2
## C7 (7 of 16) : CellGroups N = 3
## C8 (8 of 16) : CellGroups N = 3
## C9 (9 of 16) : CellGroups N = 2
## C10 (10 of 16) : CellGroups N = 3
## C11 (11 of 16) : CellGroups N = 3
## C12 (12 of 16) : CellGroups N = 3
## C13 (13 of 16) : CellGroups N = 3
## C14 (14 of 16) : CellGroups N = 3
## C15 (15 of 16) : CellGroups N = 3
## C16 (16 of 16) : CellGroups N = 3
## 2024-12-16 00:54:53 : Creating Coverage Files!, 0.034 mins elapsed.
## 2024-12-16 00:54:53 : Batch Execution w/ safelapply!, 0.034 mins elapsed.
## 2024-12-16 00:55:47 : Adding Kmer Bias to Coverage Files!, 0.932 mins elapsed.
## Completed Kmer Bias Calculation
## Adding Kmer Bias (1 of 44)
## Adding Kmer Bias (2 of 44)
## Adding Kmer Bias (3 of 44)
## Adding Kmer Bias (4 of 44)
## Adding Kmer Bias (5 of 44)
## Adding Kmer Bias (6 of 44)
## Adding Kmer Bias (7 of 44)
## Adding Kmer Bias (8 of 44)
## Adding Kmer Bias (9 of 44)
## Adding Kmer Bias (10 of 44)
## Adding Kmer Bias (11 of 44)
## Adding Kmer Bias (12 of 44)
## Adding Kmer Bias (13 of 44)
## Adding Kmer Bias (14 of 44)
## Adding Kmer Bias (15 of 44)
## Adding Kmer Bias (16 of 44)
## Adding Kmer Bias (17 of 44)
## Adding Kmer Bias (18 of 44)
## Adding Kmer Bias (19 of 44)
## Adding Kmer Bias (20 of 44)
## Adding Kmer Bias (21 of 44)
## Adding Kmer Bias (22 of 44)
## Adding Kmer Bias (23 of 44)
## Adding Kmer Bias (24 of 44)
## Adding Kmer Bias (25 of 44)
## Adding Kmer Bias (26 of 44)
## Adding Kmer Bias (27 of 44)
## Adding Kmer Bias (28 of 44)
## Adding Kmer Bias (29 of 44)
## Adding Kmer Bias (30 of 44)
## Adding Kmer Bias (31 of 44)
## Adding Kmer Bias (32 of 44)
## Adding Kmer Bias (33 of 44)
## Adding Kmer Bias (34 of 44)
## Adding Kmer Bias (35 of 44)
## Adding Kmer Bias (36 of 44)
## Adding Kmer Bias (37 of 44)
## Adding Kmer Bias (38 of 44)
## Adding Kmer Bias (39 of 44)
## Adding Kmer Bias (40 of 44)
## Adding Kmer Bias (41 of 44)
## Adding Kmer Bias (42 of 44)
## Adding Kmer Bias (43 of 44)
## Adding Kmer Bias (44 of 44)
## 2024-12-16 00:57:12 : Finished Creation of Coverage Files!, 2.359 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-addGroupCoverages-348ddd328b8e0e-Date-2024-12-16_Time-00-54-51.log
macs3_path <- system("which macs3", intern = TRUE)

# Perform peak calling for each cluster
proj <- addReproduciblePeakSet(
  ArchRProj = proj,
  groupBy = "Clusters",
  pathToMacs2 = macs3_path #macs3_path
)
## ArchR logging to : ArchRLogs/ArchR-addReproduciblePeakSet-348ddd68f7ac2b-Date-2024-12-16_Time-00-57-13.log
## If there is an issue, please report to github with logFile!
## Calling Peaks with Macs2
## 2024-12-16 00:57:13 : Peak Calling Parameters!, 0.002 mins elapsed.
##     Group nCells nCellsUsed nReplicates nMin nMax maxPeaks
## C1     C1     26         26           2   15   22    13000
## C2     C2     48         42           2   24   29    21000
## C3     C3    673        673           3  166  285   150000
## C4     C4    572        572           3  149  215   150000
## C5     C5    282        282           3   73  124   141000
## C6     C6    225        186           2   79  107    93000
## C7     C7   1045       1014           3  199  500   150000
## C8     C8    725        725           3  173  314   150000
## C9     C9    125        125           2   60   65    62500
## C10   C10    586        586           3  174  236   150000
## C11   C11    186        186           3   44   81    93000
## C12   C12    499        499           3  127  230   150000
## C13   C13    930        930           3  282  337   150000
## C14   C14   1175       1172           3  314  500   150000
## C15   C15   1058       1058           3  260  400   150000
## C16   C16    845        845           3  240  359   150000
## 2024-12-16 00:57:13 : Batching Peak Calls!, 0.003 mins elapsed.
## 2024-12-16 00:57:13 : Batch Execution w/ safelapply!, 0 mins elapsed.
## 2024-12-16 00:58:53 : Identifying Reproducible Peaks!, 1.67 mins elapsed.
## 2024-12-16 00:59:04 : Creating Union Peak Set!, 1.859 mins elapsed.
## Converged after 8 iterations!
## Plotting Ggplot!
## 2024-12-16 00:59:16 : Finished Creating Union Peak Set (280504)!, 2.051 mins elapsed.
# Add a peak matrix to the project
proj <- addPeakMatrix(proj)
## ArchR logging to : ArchRLogs/ArchR-addPeakMatrix-348ddd5d58dd7d-Date-2024-12-16_Time-00-59-16.log
## If there is an issue, please report to github with logFile!
## 2024-12-16 00:59:16 : Batch Execution w/ safelapply!, 0 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-addPeakMatrix-348ddd5d58dd7d-Date-2024-12-16_Time-00-59-16.log

Cluster Marker Peaks

Due to not enough hardware resource, I’m not able to process the follwing code:

# Perform differential peak analysis to find cluster-specific peaks
marker_peaks <- getMarkerFeatures(
  ArchRProj = proj,
  useMatrix = "PeakMatrix",
  groupBy = "Clusters",
  bias = c("TSSEnrichment", "nFrags"),  # Account for biases
  testMethod = "wilcoxon"  # Statistical test
)
## ArchR logging to : ArchRLogs/ArchR-getMarkerFeatures-348ddd4b221e68-Date-2024-12-16_Time-01-00-33.log
## If there is an issue, please report to github with logFile!
## MatrixClass = Sparse.Integer.Matrix
## 2024-12-16 01:00:34 : Matching Known Biases, 0.006 mins elapsed.
## ###########
## 2024-12-16 01:01:21 : Completed Pairwise Tests, 0.791 mins elapsed.
## ###########
## ArchR logging successful to : ArchRLogs/ArchR-getMarkerFeatures-348ddd4b221e68-Date-2024-12-16_Time-01-00-33.log
# Filter marker peaks based on cutoffs
marker_list <- getMarkers(marker_peaks, cutOff = "FDR <= 0.01 & Log2FC >= 1.5")

Heatmap of Accessibility in Marker Peaks

# Plot heatmap of marker peaks
heatmap_marker_peaks <- plotMarkerHeatmap(
  seMarker = marker_peaks,
  cutOff = "FDR <= 0.01 & Log2FC >= 1.5",
  transpose = TRUE
)
## ArchR logging to : ArchRLogs/ArchR-plotMarkerHeatmap-348ddd6101b10c-Date-2024-12-16_Time-01-01-29.log
## If there is an issue, please report to github with logFile!
## Identified 65734 markers!
##   [1] "chr1:940184-940684"        "chr1:944615-945115"       
##   [3] "chr1:984068-984568"        "chr1:1001149-1001649"     
##   [5] "chr1:1024909-1025409"      "chr1:1094954-1095454"     
##   [7] "chr1:1136891-1137391"      "chr1:1158175-1158675"     
##   [9] "chr1:1298805-1299305"      "chr1:1305501-1306001"     
##  [11] "chr1:1430352-1430852"      "chr1:1780030-1780530"     
##  [13] "chr1:1788112-1788612"      "chr1:1919763-1920263"     
##  [15] "chr1:2014653-2015153"      "chr1:152048033-152048533" 
##  [17] "chr1:2272230-2272730"      "chr1:2469167-2469667"     
##  [19] "chr1:2540631-2541131"      "chr1:2987786-2988286"     
##  [21] "chr1:3135253-3135753"      "chr1:3350454-3350954"     
##  [23] "chr1:6507864-6508364"      "chr1:6510935-6511435"     
##  [25] "chr1:6519103-6519603"      "chr1:6870035-6870535"     
##  [27] "chr1:8462374-8462874"      "chr1:8629424-8629924"     
##  [29] "chr1:8727834-8728334"      "chr1:2060238-2060738"     
##  [31] "chr1:2138074-2138574"      "chr1:2230089-2230589"     
##  [33] "chr1:2248166-2248666"      "chr1:2248755-2249255"     
##  [35] "chr1:2432248-2432748"      "chr1:2493507-2494007"     
##  [37] "chr1:2526707-2527207"      "chr1:2527256-2527756"     
##  [39] "chr1:2527759-2528259"      "chr1:2530225-2530725"     
##  [41] "chr1:2530948-2531448"      "chr1:2133010-2133510"     
##  [43] "chr1:2257242-2257742"      "chr1:3366734-3367234"     
##  [45] "chr1:4680979-4681479"      "chr1:5477645-5478145"     
##  [47] "chr1:6016161-6016661"      "chr1:7783809-7784309"     
##  [49] "chr1:8438865-8439365"      "chr1:8941796-8942296"     
##  [51] "chr1:9324493-9324993"      "chr1:9352216-9352716"     
##  [53] "chr1:9388221-9388721"      "chr1:9532705-9533205"     
##  [55] "chr1:3226818-3227318"      "chr1:3332010-3332510"     
##  [57] "chr1:3712994-3713494"      "chr1:4173090-4173590"     
##  [59] "chr1:4179537-4180037"      "chr1:4253818-4254318"     
##  [61] "chr1:4266629-4267129"      "chr1:4345127-4345627"     
##  [63] "chr1:4395632-4396132"      "chr1:4523493-4523993"     
##  [65] "chr1:2025617-2026117"      "chr1:2243111-2243611"     
##  [67] "chr1:2266961-2267461"      "chr1:2268523-2269023"     
##  [69] "chr1:2318886-2319386"      "chr1:2330422-2330922"     
##  [71] "chr1:2345615-2346115"      "chr1:2349831-2350331"     
##  [73] "chr1:2350437-2350937"      "chr1:2375014-2375514"     
##  [75] "chr1:3109139-3109639"      "chr1:3215536-3216036"     
##  [77] "chr1:3221186-3221686"      "chr1:3222014-3222514"     
##  [79] "chr1:4070504-4071004"      "chr1:221568126-221568626" 
##  [81] "chr11:134004331-134004831" "chr12:58750424-58750924"  
##  [83] "chr12:119076557-119077057" "chr16:14305332-14305832"  
##  [85] "chr18:30552257-30552757"   "chr19:18280235-18280735"  
##  [87] "chr19:31074982-31075482"   "chr19:45178102-45178602"  
##  [89] "chr2:64265332-64265832"    "chr2:220212277-220212777" 
##  [91] "chr20:42862398-42862898"   "chr3:144404997-144405497" 
##  [93] "chr5:81851607-81852107"    "chr5:101838079-101838579" 
##  [95] "chr1:2042988-2043488"      "chr1:2128241-2128741"     
##  [97] "chr1:2259563-2260063"      "chr1:2349303-2349803"     
##  [99] "chr1:2927868-2928368"      "chr1:3448826-3449326"     
## [101] "chr1:3531270-3531770"      "chr1:4421865-4422365"     
## [103] "chr1:4430915-4431415"      "chr1:4431665-4432165"     
## [105] "chr1:4485298-4485798"      "chr1:4532572-4533072"     
## [107] "chr1:5000837-5001337"      "chr1:5029253-5029753"     
## [109] "chr1:5839778-5840278"      "chr1:11700424-11700924"   
## [111] "chr1:14496490-14496990"    "chr1:17263869-17264369"   
## [113] "chr1:17777232-17777732"    "chr1:19521350-19521850"   
## [115] "chr1:19563497-19563997"    "chr1:19878189-19878689"   
## [117] "chr1:21198822-21199322"    "chr1:29346461-29346961"   
## [119] "chr1:32043958-32044458"    "chr1:34721827-34722327"   
## [121] "chr1:35532584-35533084"    "chr1:36693149-36693649"   
## [123] "chr1:36706473-36706973"    "chr1:36714265-36714765"   
## [125] "chr1:3944651-3945151"      "chr1:5771036-5771536"     
## [127] "chr1:6076969-6077469"      "chr1:7716447-7716947"     
## [129] "chr1:8597938-8598438"      "chr1:8660053-8660553"     
## [131] "chr1:8679344-8679844"      "chr1:10510462-10510962"   
## [133] "chr1:11182334-11182834"    "chr1:11461467-11461967"   
## [135] "chr1:11507285-11507785"    "chr1:11715111-11715611"   
## [137] "chr1:12121702-12122202"    "chr1:15327162-15327662"   
## [139] "chr1:18191354-18191854"    "chr1:3314938-3315438"     
## [141] "chr1:5747280-5747780"      "chr1:6460107-6460607"     
## [143] "chr1:9347376-9347876"      "chr1:20229163-20229663"   
## [145] "chr1:22548036-22548536"    "chr1:22626169-22626669"   
## [147] "chr1:25566051-25566551"    "chr1:29945222-29945722"   
## [149] "chr1:34246300-34246800"    "chr1:37307903-37308403"   
## [151] "chr1:41383334-41383834"    "chr1:49405550-49406050"   
## [153] "chr1:53727628-53728128"    "chr1:55065549-55066049"   
## [155] "chr1:2465353-2465853"      "chr1:2480947-2481447"     
## [157] "chr1:2969682-2970182"      "chr1:3577997-3578497"     
## [159] "chr1:3892674-3893174"      "chr1:4071504-4072004"     
## [161] "chr1:4072110-4072610"      "chr1:4171417-4171917"     
## [163] "chr1:4879962-4880462"      "chr1:5139878-5140378"     
## [165] "chr1:5185406-5185906"      "chr1:5287584-5288084"     
## [167] "chr1:3212919-3213419"      "chr1:4321609-4322109"     
## [169] "chr1:5220688-5221188"      "chr1:5296795-5297295"     
## [171] "chr1:6080397-6080897"      "chr1:8312288-8312788"     
## [173] "chr1:10161073-10161573"    "chr1:11153133-11153633"   
## [175] "chr1:11157736-11158236"    "chr1:11199222-11199722"   
## [177] "chr1:14945376-14945876"    "chr1:14955871-14956371"   
## [179] "chr1:18001673-18002173"    "chr1:18279572-18280072"   
## [181] "chr1:18803034-18803534"    "chr1:3283908-3284408"     
## [183] "chr1:58442085-58442585"    "chr1:97449728-97450228"   
## [185] "chr1:186993984-186994484"  "chr10:36115956-36116456"  
## [187] "chr10:53009947-53010447"   "chr10:63255888-63256388"  
## [189] "chr10:78941836-78942336"   "chr10:83555107-83555607"  
## [191] "chr10:106555351-106555851" "chr10:109991216-109991716"
## [193] "chr10:117554433-117554933" "chr10:129892567-129893067"
## [195] "chr11:19952870-19953370"   "chr11:44819349-44819849"
## Adding Annotations..
## Preparing Main Heatmap..
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## ArchR logging successful to : ArchRLogs/ArchR-plotMarkerHeatmap-348ddd6101b10c-Date-2024-12-16_Time-01-01-29.log
# Save heatmap
plotPDF(
  heatmap_marker_peaks,
  name = "Marker_Peaks_Heatmap.pdf",
  ArchRProj = proj,
  width = 8,
  height = 6
)
## Plotting ComplexHeatmap!

Plot Accessibility Around Selected Genes

# Define genes of interest
genes_of_interest <- c("TOP2A", "MKI67", "AURKA", "SATB2", "SLC12A7")

# Plot browser tracks for the selected genes
browser_tracks <- plotBrowserTrack(
  ArchRProj = proj,
  groupBy = "Clusters",
  geneSymbol = genes_of_interest,
  upstream = 50000,  # Upstream and downstream range
  downstream = 50000
)
## ArchR logging to : ArchRLogs/ArchR-plotBrowserTrack-348ddd1c25b1c6-Date-2024-12-16_Time-01-01-43.log
## If there is an issue, please report to github with logFile!
## 2024-12-16 01:01:43 : Validating Region, 0.002 mins elapsed.
## GRanges object with 5 ranges and 2 metadata columns:
##       seqnames              ranges strand |     gene_id      symbol
##          <Rle>           <IRanges>  <Rle> | <character> <character>
##   [1]    chr17   40388516-40417950      - |        7153       TOP2A
##   [2]    chr10 128096659-128126385      - |        4288       MKI67
##   [3]    chr20   56369389-56392337      - |        6790       AURKA
##   [4]     chr2 199269500-199471266      - |       23314       SATB2
##   [5]     chr5     1050376-1112035      - |       10723     SLC12A7
##   -------
##   seqinfo: 24 sequences from hg38 genome
## 2024-12-16 01:01:43 : Adding Bulk Tracks (1 of 5), 0.003 mins elapsed.
## 2024-12-16 01:01:44 : Adding Feature Tracks (1 of 5), 0.026 mins elapsed.
## 2024-12-16 01:01:45 : Adding Gene Tracks (1 of 5), 0.027 mins elapsed.
## 2024-12-16 01:01:45 : Plotting, 0.034 mins elapsed.
## 2024-12-16 01:01:47 : Adding Bulk Tracks (2 of 5), 0.065 mins elapsed.
## 2024-12-16 01:01:48 : Adding Feature Tracks (2 of 5), 0.088 mins elapsed.
## 2024-12-16 01:01:48 : Adding Gene Tracks (2 of 5), 0.089 mins elapsed.
## 2024-12-16 01:01:48 : Plotting, 0.094 mins elapsed.
## 2024-12-16 01:01:50 : Adding Bulk Tracks (3 of 5), 0.122 mins elapsed.
## 2024-12-16 01:01:51 : Adding Feature Tracks (3 of 5), 0.143 mins elapsed.
## 2024-12-16 01:01:52 : Adding Gene Tracks (3 of 5), 0.144 mins elapsed.
## 2024-12-16 01:01:52 : Plotting, 0.149 mins elapsed.
## 2024-12-16 01:01:53 : Adding Bulk Tracks (4 of 5), 0.176 mins elapsed.
## 2024-12-16 01:01:55 : Adding Feature Tracks (4 of 5), 0.201 mins elapsed.
## 2024-12-16 01:01:55 : Adding Gene Tracks (4 of 5), 0.202 mins elapsed.
## 2024-12-16 01:01:55 : Plotting, 0.206 mins elapsed.
## 2024-12-16 01:01:57 : Adding Bulk Tracks (5 of 5), 0.24 mins elapsed.
## 2024-12-16 01:01:59 : Adding Feature Tracks (5 of 5), 0.263 mins elapsed.
## 2024-12-16 01:01:59 : Adding Gene Tracks (5 of 5), 0.264 mins elapsed.
## 2024-12-16 01:01:59 : Plotting, 0.268 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-plotBrowserTrack-348ddd1c25b1c6-Date-2024-12-16_Time-01-01-43.log
# Save browser tracks
plotPDF(
  browser_tracks,
  name = "Browser_Tracks_Selected_Genes.pdf",
  ArchRProj = proj,
  width = 10,
  height = 8
)
## Plotting Gtable!
## NULL
## Plotting Gtable!
## NULL
## Plotting Gtable!
## NULL
## Plotting Gtable!
## NULL
## Plotting Gtable!
## NULL

Week 3

5. Gene Activity (3P)

5.1 Compute Gene Activity Scores Using Chromatin Accessibility

set.seed(1234)  # For reproducibility

# Check if gene activity is already present
if(!"GeneScoreMatrix" %in% names(proj@assays)){
  # Compute gene activity scores
  proj <- addGeneScoreMatrix(
    ArchRProj = proj,
    geneModel = "hg38",  
    useMatrix = "GeneScoreMatrix"
  )
  proj
} else {
  message("Gene activity scores are already computed.")
}
## Error in h(simpleError(msg, call)): error in evaluating the argument 'table' in selecting a method for function '%in%': no slot of name "assays" for this object of class "ArchRProject"

5.2 Identify Marker Genes

# Identify marker genes
markers <- getMarkerFeatures(
  ArchRProj = proj,
  groupBy = "Clusters",  # Replace with your actual cluster column
  useMatrix = "GeneScoreMatrix",
  bias = c("TSSEnrichment", "log10(nFrags)"),
  testMethod = "wilcoxon"
)
## ArchR logging to : ArchRLogs/ArchR-getMarkerFeatures-348ddd12ffb52-Date-2024-12-16_Time-01-02-03.log
## If there is an issue, please report to github with logFile!
## MatrixClass = Sparse.Double.Matrix
## 2024-12-16 01:02:04 : Matching Known Biases, 0.002 mins elapsed.
## ###########
## 2024-12-16 01:02:53 : Completed Pairwise Tests, 0.83 mins elapsed.
## ###########
## ArchR logging successful to : ArchRLogs/ArchR-getMarkerFeatures-348ddd12ffb52-Date-2024-12-16_Time-01-02-03.log
## Get top markers per cluster
markerList <- getMarkers(markers, cutOff = "FDR < 0.05 & Log2FC > 1")

# View marker genes for a specific cluster (e.g., Cluster 1)
markerList[["Cluster1"]]
## NULL

5.3 Using MAGIC

# Assuming you want the top 5 markers from the first cluster
topMarkers <- markerList[[1]] %>% 
  as.data.frame() %>% 
  rownames() %>% 
  head(5)

print(topMarkers)
## character(0)

Plot UMAPs Without MAGIC Smoothing

# Without MAGIC
plotMarkers(
  ArchRProj = proj, 
  geneList = topMarkers, 
  groupBy = "Clusters", 
  imputeWeights = NULL,  # No smoothing
  embedding = "UMAP"
)
## Error in plotMarkers(ArchRProj = proj, geneList = topMarkers, groupBy = "Clusters", : unused arguments (ArchRProj = proj, geneList = topMarkers, groupBy = "Clusters", imputeWeights = NULL, embedding = "UMAP")

Apply MAGIC Smoothing

# Add MAGIC Imputation
proj <- addImputeWeights(
  ArchRProj = proj,
  reducedDims = "UMAP",
  useMatrix = "GeneScoreMatrix",
  k = 30,  # Number of nearest neighbors
  knnMethod = "umap",
  smooth = 2
)
## Error in addImputeWeights(ArchRProj = proj, reducedDims = "UMAP", useMatrix = "GeneScoreMatrix", : unused arguments (useMatrix = "GeneScoreMatrix", knnMethod = "umap", smooth = 2)

Plot UMAPs With MAGIC Smoothing

# With MAGIC
plotMarkers(
  ArchRProj = proj, 
  geneList = topMarkers, 
  groupBy = "Clusters", 
  imputeWeights = getImputeWeights(proj),  # Apply smoothing
  embedding = "UMAP"
)
## Error in plotMarkers(ArchRProj = proj, geneList = topMarkers, groupBy = "Clusters", : unused arguments (ArchRProj = proj, geneList = topMarkers, groupBy = "Clusters", imputeWeights = getImputeWeights(proj), embedding = "UMAP")

6. Transcription Factor Motif Activity (8P)

6.1 Compute TF Motif Activity

Which annotation did you choose?

We selected the JASPAR2020 database for TF motif annotations.

How was the annotation obtained?

The JASPAR2020 database is a widely recognized repository of curated, non-redundant TF binding profiles. It was obtained using the getMatrixSet function from the JASPAR2020 package, which provides access to high-quality, manually curated TF motifs.

if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")

BiocManager::install(c("JASPAR2020", "motifmatchr"))
## Bioconductor version 3.12 (BiocManager 1.30.18), R 4.0.5 (2021-03-31)
## Warning: package(s) not installed when version(s) same as current; use `force = TRUE` to
##   re-install: 'motifmatchr'
## Installing package(s) 'JASPAR2020'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ... done
## Old packages: 'abind', 'AnnotationHub', 'askpass', 'backports', 'batchelor',
##   'BH', 'BiocGenerics', 'BiocManager', 'bit', 'bit64', 'bitops', 'blob',
##   'boot', 'brew', 'brio', 'broom', 'bslib', 'cachem', 'Cairo', 'callr', 'car',
##   'caret', 'caTools', 'circlize', 'class', 'classInt', 'cli', 'clue',
##   'cluster', 'codetools', 'colorspace', 'commonmark', 'conquer', 'corrplot',
##   'cowplot', 'cpp11', 'crayon', 'credentials', 'crosstalk', 'curl',
##   'data.table', 'DBI', 'dbplyr', 'deldir', 'desc', 'devtools', 'digest',
##   'dotCall64', 'downlit', 'dplyr', 'dqrng', 'DT', 'dtplyr', 'e1071',
##   'evaluate', 'ExperimentHub', 'fansi', 'farver', 'fastmap', 'fields',
##   'fitdistrplus', 'FNN', 'fontawesome', 'forcats', 'foreign', 'formatR', 'fs',
##   'future', 'future.apply', 'gargle', 'GenomeInfoDb', 'GenomicFeatures',
##   'gert', 'ggplot2', 'ggpubr', 'ggrepel', 'ggridges', 'ggsci', 'ggsignif',
##   'gh', 'globals', 'glue', 'googledrive', 'googlesheets4', 'gower', 'gplots',
##   'gtable', 'gtools', 'hardhat', 'harmony', 'haven', 'hexbin', 'highr', 'hms',
##   'htmltools', 'htmlwidgets', 'httpuv', 'httr', 'hunspell', 'igraph', 'ipred',
##   'irlba', 'isoband', 'jsonlite', 'KernSmooth', 'knitr', 'labeling', 'later',
##   'lattice', 'lava', 'leiden', 'leidenbase', 'lifecycle', 'listenv', 'lme4',
##   'lubridate', 'maps', 'MatrixModels', 'matrixStats', 'mgcv', 'minqa',
##   'modeldata', 'modelr', 'munsell', 'nlme', 'nloptr', 'nnet', 'openssl',
##   'openxlsx', 'parallelly', 'patchwork', 'pbapply', 'pillar', 'pkgbuild',
##   'pkgdown', 'pkgload', 'plotly', 'plyr', 'png', 'polyclip', 'poweRlaw',
##   'pracma', 'prettyunits', 'pROC', 'processx', 'prodlim', 'profvis',
##   'progress', 'progressr', 'promises', 'pryr', 'ps', 'pscl', 'purrr',
##   'quantreg', 'R.oo', 'R.utils', 'ragg', 'RANN', 'raster', 'Rcpp', 'RcppAnnoy',
##   'RcppArmadillo', 'RcppEigen', 'RcppHNSW', 'RcppParallel', 'RcppTOML',
##   'RCurl', 'readr', 'readxl', 'recipes', 'rematch', 'remotes', 'reprex',
##   'reticulate', 'rhdf5filters', 'RhpcBLASctl', 'rio', 'rjson', 'rlang',
##   'rmarkdown', 'roxygen2', 'rpart', 'rprojroot', 'rsample', 'RSpectra',
##   'RSQLite', 'rstatix', 'rstudioapi', 'Rtsne', 'rvest', 's2', 'sass', 'scales',
##   'scattermore', 'sctransform', 'Seurat', 'sf', 'shape', 'shiny', 'slam',
##   'slider', 'sourcetools', 'sp', 'spam', 'SparseM', 'spatstat',
##   'spatstat.data', 'spatstat.geom', 'spatstat.linnet', 'spatstat.random',
##   'spatstat.sparse', 'spatstat.utils', 'spData', 'spdep', 'speedglm',
##   'spelling', 'statmod', 'stringi', 'stringr', 'survival', 'sys',
##   'systemfonts', 'terra', 'testthat', 'TFMPvalue', 'tibble', 'tidyr',
##   'tidyselect', 'tidyverse', 'timeDate', 'tinytex', 'tzdb', 'units', 'usethis',
##   'utf8', 'uuid', 'uwot', 'vctrs', 'viridis', 'viridisLite', 'vroom', 'waldo',
##   'warp', 'whisker', 'withr', 'wk', 'xfun', 'XML', 'xml2', 'xopen', 'yaml',
##   'zip', 'zoo'
library(ArchR)
library(JASPAR2020)
library(motifmatchr)


params <- list()
params[["species"]] <- 9606  # NCBI taxonomy ID for Homo sapiens

# Retrieve the motif set
motifs <- getMatrixSet(JASPAR2020, opts = params)
## Error in getMatrixSet(JASPAR2020, opts = params): could not find function "getMatrixSet"
# Add motifs to ArchR project
proj <- addMotifAnnotations(
  ArchRProj = proj,
  motifSet = "JASPAR2020",
  name = "Motif",
  pathToMacs = macs2_path 
)
## ArchR logging to : ArchRLogs/ArchR-addMotifAnnotations-348ddd6afb0c53-Date-2024-12-16_Time-01-03-10.log
## If there is an issue, please report to github with logFile!
## 2024-12-16 01:03:10 : Gettting Motif Set, Species : Homo sapiens, 0.002 mins elapsed.
## Error in addMotifAnnotations(ArchRProj = proj, motifSet = "JASPAR2020", : object 'macs2_path' not found
# Add motif matrix to ArchR project
proj <- addMotifMatrix(
  ArchRProj = proj,
  genome = "hg38",  # Replace with your genome version
  useMatrix = "TileMatrix",
  scaleTo = 10000
)
## Error in addMotifMatrix(ArchRProj = proj, genome = "hg38", useMatrix = "TileMatrix", : could not find function "addMotifMatrix"
# Add motif activity scores
proj <- addMotifEnrichment(
  ArchRProj = proj,
  motifSet = "JASPAR2020",
  useMatrix = "GeneScoreMatrix",
  bias = c("TSSEnrichment", "log10(nFrags)")
)
## Error in addMotifEnrichment(ArchRProj = proj, motifSet = "JASPAR2020", : could not find function "addMotifEnrichment"
# Compute TF motif activity
proj <- addTranscriptionFactor(
  ArchRProj = proj,
  name = "TF_Activity",
  motifSet = "JASPAR2020",
  useMatrix = "GeneScoreMatrix"
)
## Error in addTranscriptionFactor(ArchRProj = proj, name = "TF_Activity", : could not find function "addTranscriptionFactor"

6.2 Plot UMAP Embeddings for Marker TFs

# Identify marker TF motifs using the TF activity matrix
markers_tf <- getMarkerFeatures(
  ArchRProj = proj,
  groupBy = "Clusters",  # Replace with your actual cluster column
  useMatrix = "MotifMatrix",
  bias = c("TSSEnrichment", "log10(nFrags)"),
  testMethod = "wilcoxon"
)
## ArchR logging to : ArchRLogs/ArchR-getMarkerFeatures-348ddd6c9185f0-Date-2024-12-16_Time-01-03-10.log
## If there is an issue, please report to github with logFile!
## Error in h5read(ArrowFile, paste0(subGroup, "/Info/FeatureDF")): Object 'MotifMatrix/Info/FeatureDF' does not exist in this HDF5 file.
# Retrieve marker TF motifs with specified cut-offs
markerList_tf <- getMarkers(
  markers_tf, 
  cutOff = "FDR < 0.05 & Log2FC > 1"
)
## Error in is(input, "SummarizedExperiment"): object 'markers_tf' not found
# Extract the top variable TF motifs across all clusters
# For simplicity, we'll aggregate and find overall top TFs
allMarkers <- unlist(markerList_tf, use.names = FALSE)
## Error in unlist(markerList_tf, use.names = FALSE): object 'markerList_tf' not found
topTFs <- unique(head(allMarkers, 10))  # Top 10 TFs across clusters
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'unique': error in evaluating the argument 'x' in selecting a method for function 'head': object 'allMarkers' not found
# Alternatively, identify TFs with highest variability
# Here, we can use variance across cells
tfMatrix <- getMatrixFromProject(proj, useMatrix = "MotifMatrix")
## ArchR logging to : ArchRLogs/ArchR-getMatrixFromProject-348ddd3d13b85c-Date-2024-12-16_Time-01-03-10.log
## If there is an issue, please report to github with logFile!
## Error in getMatrixFromProject(proj, useMatrix = "MotifMatrix"): useMatrix is not in Available Matrices see getAvailableMatrices
tfVariability <- apply(tfMatrix, 1, var)
## Error in apply(tfMatrix, 1, var): object 'tfMatrix' not found
topVariableTFs <- names(sort(tfVariability, decreasing = TRUE))[1:10]
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'sort': object 'tfVariability' not found
# Select top 2 marker TF motifs
top2TFs <- head(topVariableTFs, 2)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'head': object 'topVariableTFs' not found
print(top2TFs)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'print': object 'top2TFs' not found

Plot UMAPs for the Top 2 Marker TF Motifs

# Plot UMAP for the first top marker TF without MAGIC smoothing
plotEmbedding(
  ArchRProj = proj,
  colorBy = "Motif",
  name = top2TFs[1],
  embedding = "UMAP",
  imputeWeights = NULL  # No smoothing
) + ggtitle(paste("UMAP -", top2TFs[1], "Motif Activity"))
## Error: object 'top2TFs' not found
# Plot UMAP for the second top marker TF without MAGIC smoothing
plotEmbedding(
  ArchRProj = proj,
  colorBy = "Motif",
  name = top2TFs[2],
  embedding = "UMAP",
  imputeWeights = NULL  # No smoothing
) + ggtitle(paste("UMAP -", top2TFs[2], "Motif Activity"))
## Error: object 'top2TFs' not found
# Apply MAGIC Smoothing
proj <- addImputeWeights(
  ArchRProj = proj,
  useMatrix = "MotifMatrix",
  reducedDims = "UMAP",
  k = 30,  # Number of nearest neighbors
  knnMethod = "UMAP",
  smooth = 2
)
## Error in addImputeWeights(ArchRProj = proj, useMatrix = "MotifMatrix", : unused arguments (useMatrix = "MotifMatrix", knnMethod = "UMAP", smooth = 2)
# Plot UMAP for the first top marker TF with MAGIC smoothing
plotEmbedding(
  ArchRProj = proj,
  colorBy = "Motif",
  name = top2TFs[1],
  embedding = "UMAP",
  imputeWeights = getImputeWeights(proj)
) + ggtitle(paste("UMAP -", top2TFs[1], "Motif Activity (MAGIC)"))
## Error: object 'top2TFs' not found
# Plot UMAP for the second top marker TF with MAGIC smoothing
plotEmbedding(
  ArchRProj = proj,
  colorBy = "Motif",
  name = top2TFs[2],
  embedding = "UMAP",
  imputeWeights = getImputeWeights(proj)
) + ggtitle(paste("UMAP -", top2TFs[2], "Motif Activity (MAGIC)"))
## Error: object 'top2TFs' not found

6.3 Motif Activity

# Select top 5 marker TFs from the first cluster
topMarkerTFs <- head(markerList_tf[[1]], 5)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'head': object 'markerList_tf' not found
# Alternatively, use the top 5 most variable TFs identified earlier
topMarkerTFs <- head(topVariableTFs, 5)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'head': object 'topVariableTFs' not found
print(topMarkerTFs)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'print': object 'topMarkerTFs' not found

7. Integration with Gene Expression (7P)

7.1 Data Integration

7.2 Correlation Coefficients

7.3 Cluster Labels from Gene Expression

8. Peak-Gene Linkage (5P)

9. Differential Accessibility (5P)

9.1 Differential Peak Accessibility

9.2 TF Motif Enrichment

10. TF Footprinting (6P)

10.1 Obtaining Footprints on Feature Set

10.2 Normalization for Tn5 Bias

11. (Bonus) Co-accessibility (+5P)

11.1 Co-accessibility of Peaks

11.2 Identify Potential Enhancers for Marker Genes

References