Introduction

The purpose of this script is to perform QC and filter single-cell RNA-seq data with Seurat. Adapted from the Seurat documentation https://satijalab.org/seurat/ and the Harvard Chan Bioinformatics Core guide https://github.com/hbctraining/scRNA-seq/tree/master

Loading data and packages

Install necessary software packages (first time only):

if (!require("BiocManager", quietly = TRUE))
        install.packages("BiocManager")
BiocManager::install(version = "3.19", force=TRUE)

options(BioC_mirror = "http://bioconductor.org")

if (!requireNamespace("remotes", quietly = TRUE)) {
  install.packages("remotes")
}

if (!requireNamespace("devtools", quietly = TRUE)) {
  install.packages("devtools")
}

install.packages("Seurat")
install.packages("tidyverse")
install.packages("clustree")
install.packages("stringr")
remotes::install_github("chris-mcginnis-ucsf/DoubletFinder")
install.packages('patchwork')
install.packages("ggrepel")
install.packages("pheatmap")
install.packages("RColorBrewer")
BiocManager::install("SingleR")
install.packages("msigdbr")
BiocManager::install("clusterProfiler")

Load necessary software packages (every time):

library(Seurat)
library(tidyverse)
library(patchwork)
library(pheatmap)
library(RColorBrewer)
library(data.table)
library(knitr)
library(DoubletFinder)

Load single-cell RNA-seq count data

Input: A barcodes.tsv.gz file of all cellular barcodes present for that sample, a features.tsv.gz file containing identifiers of the quantified genes, and a matrix.mtx.gz file containing a matix of count values. These files should all be in one directory. A Seurat object should be initiated with raw (non-normalized) data.

# This loop assumes the following data structure for accessing Cell Ranger output files:
# current working directory/
# ├── CellRangerOuts
#     ├── Sample1
#         ├──filtered_feature_bc_matrix
#           ├──barcodes.tsv.gz
#           ├──features.tsv.gz
#           ├──matrix.mtx.gz
#     ├── Sample2
#         ├──filtered_feature_bc_matrix
#           ├──barcodes.tsv.gz
#           ├──features.tsv.gz
#           ├──matrix.mtx.gz



# loop through all sample IDs, pull the barcodes.tsv.gz, features.tsv.gz, and matrix.mtx.gz files from their respective directories, and create a Seurat object for each with the sample ID as the project variable

setwd("C:/Users/edlarsen/Documents/240828_scRNAseq")

for (file in c("L165597", "LN154803", "LN157849", "T154802", "T165635", "Th157850")){
  seurat_data <- Read10X(data.dir = paste("CellRangerOuts", file, "filtered_feature_bc_matrix/", sep = "/"))
  seurat_obj <- CreateSeuratObject(counts = seurat_data,
                                   min.cells = 3, # excludes features expressed in less than 3 cells
                                   min.features = 100, # removes dead cells and empty droplets where few features are detected
                                   project = file)
  assign(file, seurat_obj)
}
# Explore the metadata of the resulting Seurat objects. 'orig.ident' contains the sample identity, 'nCount_RNA' is the number of UMIs per cell, and 'nFeature_RNA' is the number of genes detected per cell.
head(L165597@meta.data)
##                    orig.ident nCount_RNA nFeature_RNA
## AAACCCAAGCATTTGC-1    L165597      25070         4830
## AAACCCAAGCTGACCC-1    L165597      10953         3200
## AAACCCACAGCGACCT-1    L165597       4034         1694
## AAACCCAGTCAAGCGA-1    L165597       8610         2362
## AAACCCAGTCTCCCTA-1    L165597       6616         2047
## AAACCCAGTGAGCTCC-1    L165597       5729         2063
head(LN154803@meta.data)
##                    orig.ident nCount_RNA nFeature_RNA
## AAACCCAAGAATCTAG-1   LN154803       2627         1515
## AAACCCAAGAGGGTCT-1   LN154803       2793         1192
## AAACCCACAAGTGCAG-1   LN154803       4157         1639
## AAACCCACACAGTCCG-1   LN154803       2220         1165
## AAACCCACAGCTGTGC-1   LN154803       7223         2380
## AAACCCACAGGCCCTA-1   LN154803       9561         3005
head(LN157849@meta.data)
##                    orig.ident nCount_RNA nFeature_RNA
## AAACCCAAGTAGCTCT-1   LN157849       3285         1405
## AAACCCACAACTCGTA-1   LN157849       3110         1421
## AAACCCACAATAAGGT-1   LN157849       4151         1587
## AAACCCACACTCAGAT-1   LN157849       2526         1301
## AAACCCACATGGATCT-1   LN157849       4054         1429
## AAACCCACATGGGTTT-1   LN157849       3569         1495
head(T154802@meta.data)
##                    orig.ident nCount_RNA nFeature_RNA
## AAACCCAAGATACCAA-1    T154802       3932         1741
## AAACCCAAGCGTGAGT-1    T154802       3970         1586
## AAACCCACAACGGGTA-1    T154802       5251         2272
## AAACCCACACAACGCC-1    T154802       5280         2105
## AAACCCACAGACCTGC-1    T154802       5158         1977
## AAACCCAGTATGAAGT-1    T154802       4430         1888
head(T165635@meta.data)
##                    orig.ident nCount_RNA nFeature_RNA
## AAACCCAAGGCAGGTT-1    T165635       4464         1579
## AAACCCAAGTTGCGCC-1    T165635       1117          227
## AAACCCACAACCAATC-1    T165635       6259         2202
## AAACCCACATGGTGGA-1    T165635       6369         2167
## AAACCCAGTGCAACAG-1    T165635       5252         2346
## AAACGAAAGAGGGTAA-1    T165635       8844         2588
head(Th157850@meta.data)
##                    orig.ident nCount_RNA nFeature_RNA
## AAACCCAAGACCGCCT-1   Th157850       2733         1347
## AAACCCAAGGAACTAT-1   Th157850       1954         1102
## AAACCCACAAATACAG-1   Th157850       1801         1050
## AAACCCACAGCAGGAT-1   Th157850       2085          939
## AAACCCACAGCAGTTT-1   Th157850        547          409
## AAACCCAGTACATTGC-1   Th157850      20500         4613

Merging samples

Merge all samples into one Seurat object for QC

Merging makes it easier to run the QC steps and easily compare the data quality for all samples.

# create merged Seurat object. 'add.cell.ids' prepends given identifier to the beginning of each cell name to easily tell which original object a particular cell came from
merged_seurat <- merge(L165597, y = c(LN154803, LN157849, T154802, T165635, Th157850),
                       add.cell.ids = c("L165597", "LN154803", "LN157849", "T154802", "T165635", "Th157850"))

# inspect resulting merged object
head(colnames(merged_seurat))
## [1] "L165597_AAACCCAAGCATTTGC-1" "L165597_AAACCCAAGCTGACCC-1"
## [3] "L165597_AAACCCACAGCGACCT-1" "L165597_AAACCCAGTCAAGCGA-1"
## [5] "L165597_AAACCCAGTCTCCCTA-1" "L165597_AAACCCAGTGAGCTCC-1"
tail(colnames(merged_seurat))
## [1] "Th157850_TTTGTTGCATTGTAGC-1" "Th157850_TTTGTTGGTACCCGCA-1"
## [3] "Th157850_TTTGTTGGTACTGAGG-1" "Th157850_TTTGTTGGTATCGTGT-1"
## [5] "Th157850_TTTGTTGTCCAATCTT-1" "Th157850_TTTGTTGTCTTGGTCC-1"
table(merged_seurat$orig.ident)
## 
##  L165597 LN154803 LN157849  T154802  T165635 Th157850 
##     5844    11277    13126     9130     6480     7836

Merge samples by location

Merge individual lymph node samples and individual thymus samples into one Seurat object for lymph node and one Seurat object for thymus, respectively, for downstream clustering and other analyses. These are the objects that will be filtered following QC.

## lymph node
merged_ln <- merge(L165597, y = c(LN154803, LN157849),
                   add.cell.ids = c("L165597", "LN154803", "LN157849"))
merged_ln[["RNA"]] <- JoinLayers(merged_ln[["RNA"]]) # rejoin layers after merging

## thymus
merged_thym <- merge(T154802, y = c(T165635, Th157850),
                     add.cell.ids = c("T154802", "T165635", "Th157850"))
merged_thym[["RNA"]] <- JoinLayers(merged_thym[["RNA"]]) # rejoin layers after merging

Perform QC of raw count data

Add additional parameters to metadata

# Add column of genes per UMI for each cell to object metadata
merged_seurat[["log10GenesPerUMI"]] <- log10(merged_seurat$nFeature_RNA / log10(merged_seurat$nCount_RNA))
merged_ln[["log10GenesPerUMI"]] <- log10(merged_ln$nFeature_RNA) / log10(merged_ln$nCount_RNA)
merged_thym[["log10GenesPerUMI"]] <- log10(merged_thym$nFeature_RNA) / log10(merged_thym$nCount_RNA)

# Add columns of mitochondrial percentage and ratio to object metadata
merged_seurat[["percent.mt"]] <- PercentageFeatureSet(merged_seurat, pattern = "^MT-")
merged_seurat[["mitoRatio"]] <- merged_seurat@meta.data$percent.mt / 100

merged_ln[["percent.mt"]] <- PercentageFeatureSet(merged_ln, pattern = "^MT-")
merged_ln[["mitoRatio"]] <- merged_ln@meta.data$percent.mt / 100

merged_thym[["percent.mt"]] <- PercentageFeatureSet(merged_thym, pattern = "^MT-")
merged_thym[["mitoRatio"]] <- merged_thym@meta.data$percent.mt / 100

# Create metadata dataframe to add some information for QC analysis without affecting our seurat object
metadata <- merged_seurat@meta.data

# Add column with cell IDs
metadata$cells <- rownames(metadata)

# Change column names to be more intuitive
metadata <- metadata %>%
  dplyr::rename(sampleID = orig.ident,
                nUMI = nCount_RNA,
                nGene = nFeature_RNA)

head(metadata)
##                            sampleID  nUMI nGene log10GenesPerUMI percent.mt
## L165597_AAACCCAAGCATTTGC-1  L165597 25070  4830         3.040578   5.416833
## L165597_AAACCCAAGCTGACCC-1  L165597 10953  3200         2.898819   6.272254
## L165597_AAACCCACAGCGACCT-1  L165597  4034  1694         2.671919   5.726326
## L165597_AAACCCAGTCAAGCGA-1  L165597  8610  2362         2.778335   1.765389
## L165597_AAACCCAGTCTCCCTA-1  L165597  6616  2047         2.728987   4.337969
## L165597_AAACCCAGTGAGCTCC-1  L165597  5729  2063         2.739533   3.857567
##                             mitoRatio                      cells
## L165597_AAACCCAAGCATTTGC-1 0.05416833 L165597_AAACCCAAGCATTTGC-1
## L165597_AAACCCAAGCTGACCC-1 0.06272254 L165597_AAACCCAAGCTGACCC-1
## L165597_AAACCCACAGCGACCT-1 0.05726326 L165597_AAACCCACAGCGACCT-1
## L165597_AAACCCAGTCAAGCGA-1 0.01765389 L165597_AAACCCAGTCAAGCGA-1
## L165597_AAACCCAGTCTCCCTA-1 0.04337969 L165597_AAACCCAGTCTCCCTA-1
## L165597_AAACCCAGTGAGCTCC-1 0.03857567 L165597_AAACCCAGTGAGCTCC-1
setwd("C:/Users/edlarsen/Documents/240828_scRNAseq")
write.csv(metadata, file = "QC/seurat_QC_metadata.csv")

Cell counts per sample

metadata %>% 
    ggplot(aes(x=sampleID, fill=sampleID)) + 
    geom_bar() +
    scale_y_continuous(breaks = seq(0, 20000, by = 1000)) +
    theme_classic() +
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
    theme(plot.title = element_text(hjust=0.5, face="bold")) +
    ggtitle("Number of Cell Counts Per Sample")

Number of UMIs (transcripts) per cell

UMI counts per cell should generally be at least over 500, with the majority of cells having 1000 UMIs or more. On the other hand, cell doublets/multiplets may exhibit an aberrantly high UMI count.

# Violin plot
VlnPlot(merged_seurat, features = "nCount_RNA", alpha = 0.1) + scale_y_continuous(breaks = seq(0, 90000, by = 20000)) + geom_hline(yintercept = 10000)

# Histogram
metadata %>% 
    ggplot(aes(color=sampleID, x=nUMI, fill= sampleID)) + 
    geom_density(alpha = 0.2) + 
    scale_x_log10() + 
    theme_classic() +
    ylab("log10 cell density") +
    geom_vline(xintercept = 1000)

Number of genes detected per cell

Low-quality cells or empty droplets will often have very few genes. However, cell doublets/multiplets may exhibit an aberrantly high gene counts.

# Violin plot
VlnPlot(merged_seurat, features = "nFeature_RNA", alpha = 0.1) + scale_y_continuous(breaks = seq(0, 8000, by = 500))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

# Histogram
metadata %>% 
    ggplot(aes(color=sampleID, x=nGene, fill= sampleID)) + 
    geom_density(alpha = 0.2) + 
    theme_classic() +
    scale_x_log10() + 
    geom_vline(xintercept = 300) +
    labs(title = "Number of Cells vs Number of Genes", y = "Cell density")

# Box plot
metadata %>% 
    ggplot(aes(x=sampleID, y=log10(nGene), fill=sampleID)) + 
    geom_boxplot() + 
    theme_classic() +
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
    theme(plot.title = element_text(hjust=0.5, face="bold")) +
    ggtitle("Number of Cells vs Number of Genes")

Number of genes per UMI

Gives an idea of the complexity of the dataset (more genes per UMI = more complex data). Generally, the novelty score is expected to be above 0.80.

# Visualize the overall complexity of the gene expression by visualizing the genes detected per UMI
metadata %>%
    ggplot(aes(x=log10GenesPerUMI, color = sampleID, fill=sampleID)) +
    geom_density(alpha = 0.2) +
    theme_classic() +
    geom_vline(xintercept = 0.8) +
    ylab("Cell density")

Here, the number of genes versus the number of UMIs is plotted, colored by the fraction of mitochondrial reads. Mitochondrial read fractions are only high in particularly low count cells with few detected genes. This could be indicative of damaged/dying cells, and these cells are filtered out by our count and gene number thresholds. Jointly visualizing the count and gene thresholds shows the joint filtering effect.

Cells that are poor quality are likely to have low genes and UMIs per cell, and correspond to data points in the bottom left quadrant of the plot. The cells in the bottom right quadrant have high UMIs but only a few number of genes. These could be dying cells but could also represent a population of low complexity cell types.

# Visualize the correlation between genes detected and number of UMIs and determine whether strong presence of cells with low numbers of genes/UMIs
metadata %>% 
    ggplot(aes(x=nUMI, y=nGene, color=mitoRatio)) + 
    geom_point() + 
      scale_colour_gradient(low = "gray90", high = "black") +
    stat_smooth(method=lm) +
    scale_x_log10() + 
    scale_y_log10() + 
    theme_classic() +
    labs(title = "Number of Genes per UMI") +
    geom_vline(xintercept = 2200) +
    geom_hline(yintercept = 600) +
    facet_wrap(~sampleID)

Mitochondrial counts ratio

Identify whether there is a large amount of mitochondrial contamination from dead or dying cells. Poor quality samples for mitochondrial counts are defined as cells which surpass the 0.2 mitochondrial ratio mark, unless this is expected for the sample.

# Violin plot of mitochondrial percentage
VlnPlot(merged_seurat, features = "percent.mt", alpha=0.1) + scale_y_continuous(breaks = seq(0, 100, by = 5))

# Histogram of mitochondrial ratio
metadata %>% 
    ggplot(aes(color=sampleID, x=mitoRatio, fill=sampleID)) + 
    geom_density(alpha = 0.2) + 
    scale_x_log10() + 
    theme_classic() +
    geom_vline(xintercept = 0.2) +
    labs(title = "Mitochondrial Counts Ratio", y = "Cell density")

Filtering

Considering any of the above QC metrics in isolation can lead to misinterpretation of cellular signals. E.g., cells with a high fraction of mitochondrial counts may be involved in respiratory processes and may be cells to keep. Always consider the joint effects of these metrics when setting thresholds, and set them to be as permissive as possible to avoid filtering out viable cell populations.

Cell level filtering

Based on the results observed in the QC plots, the following filters will be applied: - nUMI (nCount_RNA) > 500 - nGene (nFeature_RNA) > 300 and < 6000 - percent.mt < 20%

### Filter merged lymph node object
filtered_ln <- subset(x = merged_ln,
                          subset = (nCount_RNA >= 500) &
                            (nFeature_RNA >= 300) &
                            (nFeature_RNA <= 6000) &
                            (percent.mt < 20))

### Filter merged thymus object
filtered_thym <- subset(x = merged_thym,
                          subset = (nCount_RNA >= 500) &
                            (nFeature_RNA >= 300) &
                            (nFeature_RNA <= 6000) &
                            (percent.mt < 20))

Gene-level filtering

Genes with zero counts can dramatically reduce the average expression for a cell, so they should be removed from the data.

# Remove genes that have zero expression in all cells
## Merged lymph node object
counts_ln <- GetAssayData(object = filtered_ln, layer = "counts")
nonzero <- counts_ln > 0 # output a logical vector for every gene on whether there are more than zero counts per cell

## Merged thymus object
counts_thym <- GetAssayData(object = filtered_thym, layer = "counts")
nonzero <- counts_thym > 0



# Filter by prevalence - keep only genes which are expressed in 10 or more cells
keep_genes <- Matrix::rowSums(nonzero) >= 10

# Only keeping those genes expressed in more than 10 cells
filtered_counts_ln <- counts_ln[keep_genes, ] # lymph node
filtered_counts_thym <- counts_thym[keep_genes, ] # thymus

# Reassign to filtered Seurat object
filtered_ln <- CreateSeuratObject(filtered_counts_ln, meta.data = filtered_ln@meta.data) # lymph node
filtered_thym <- CreateSeuratObject(filtered_counts_thym, meta.data = filtered_thym@meta.data) # thymus

setwd("C:/Users/edlarsen/Documents/240828_scRNAseq")
write.csv(filtered_ln@meta.data, file="QC/filtered_seurat_metadata_ln.csv")
write.csv(filtered_thym@meta.data, file="QC/filtered_seurat_metadata_thym.csv")

Examine filtered objects

### Lymph node
paste("Unfiltered lymph node object:")
## [1] "Unfiltered lymph node object:"
merged_ln
## An object of class Seurat 
## 16088 features across 30247 samples within 1 assay 
## Active assay: RNA (16088 features, 0 variable features)
##  1 layer present: counts
paste("Filtered lymph node object:")
## [1] "Filtered lymph node object:"
filtered_ln
## An object of class Seurat 
## 14892 features across 29438 samples within 1 assay 
## Active assay: RNA (14892 features, 0 variable features)
##  1 layer present: counts
### Thymus
paste("Unfiltered thymus object:")
## [1] "Unfiltered thymus object:"
merged_thym
## An object of class Seurat 
## 15841 features across 23446 samples within 1 assay 
## Active assay: RNA (15841 features, 0 variable features)
##  1 layer present: counts
paste("Filtered thymus object:")
## [1] "Filtered thymus object:"
filtered_thym
## An object of class Seurat 
## 14661 features across 22654 samples within 1 assay 
## Active assay: RNA (14661 features, 0 variable features)
##  1 layer present: counts

Save filtered cells

saveRDS(filtered_ln, file="seurat_merged_LN_filtered.RData")
saveRDS(filtered_thym, file="seurat_merged_THYM_filtered.RData")

Next step: Identify and remove doublets.

Citations

sessionInfo()
## R version 4.4.0 (2024-04-24 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/Denver
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] DoubletFinder_2.0.4 knitr_1.49          data.table_1.16.4  
##  [4] RColorBrewer_1.1-3  pheatmap_1.0.12     patchwork_1.3.0    
##  [7] lubridate_1.9.4     forcats_1.0.0       stringr_1.5.1      
## [10] dplyr_1.1.4         purrr_1.0.2         readr_2.1.5        
## [13] tidyr_1.3.1         tibble_3.2.1        ggplot2_3.5.1      
## [16] tidyverse_2.0.0     Seurat_5.1.0        SeuratObject_5.0.2 
## [19] sp_2.1-4           
## 
## loaded via a namespace (and not attached):
##   [1] rstudioapi_0.17.1      jsonlite_1.8.9         magrittr_2.0.3        
##   [4] ggbeeswarm_0.7.2       spatstat.utils_3.1-1   farver_2.1.2          
##   [7] rmarkdown_2.29         vctrs_0.6.5            ROCR_1.0-11           
##  [10] spatstat.explore_3.3-3 htmltools_0.5.8.1      sass_0.4.9            
##  [13] sctransform_0.4.1      parallelly_1.40.1      KernSmooth_2.23-22    
##  [16] bslib_0.8.0            htmlwidgets_1.6.4      ica_1.0-3             
##  [19] plyr_1.8.9             plotly_4.10.4          zoo_1.8-12            
##  [22] cachem_1.1.0           igraph_2.1.2           mime_0.12             
##  [25] lifecycle_1.0.4        pkgconfig_2.0.3        Matrix_1.7-0          
##  [28] R6_2.5.1               fastmap_1.2.0          fitdistrplus_1.2-2    
##  [31] future_1.34.0          shiny_1.10.0           digest_0.6.35         
##  [34] colorspace_2.1-1       tensor_1.5             RSpectra_0.16-2       
##  [37] irlba_2.3.5.1          labeling_0.4.3         progressr_0.15.1      
##  [40] spatstat.sparse_3.1-0  timechange_0.3.0       mgcv_1.9-1            
##  [43] httr_1.4.7             polyclip_1.10-7        abind_1.4-8           
##  [46] compiler_4.4.0         withr_3.0.2            fastDummies_1.7.4     
##  [49] MASS_7.3-60.2          tools_4.4.0            vipor_0.4.7           
##  [52] lmtest_0.9-40          beeswarm_0.4.0         httpuv_1.6.15         
##  [55] future.apply_1.11.3    goftest_1.2-3          glue_1.7.0            
##  [58] nlme_3.1-164           promises_1.3.2         grid_4.4.0            
##  [61] Rtsne_0.17             cluster_2.1.6          reshape2_1.4.4        
##  [64] generics_0.1.3         gtable_0.3.6           spatstat.data_3.1-4   
##  [67] tzdb_0.4.0             hms_1.1.3              spatstat.geom_3.3-4   
##  [70] RcppAnnoy_0.0.22       ggrepel_0.9.6          RANN_2.6.2            
##  [73] pillar_1.10.1          spam_2.11-0            RcppHNSW_0.6.0        
##  [76] later_1.3.2            splines_4.4.0          lattice_0.22-6        
##  [79] survival_3.5-8         deldir_2.0-4           tidyselect_1.2.1      
##  [82] miniUI_0.1.1.1         pbapply_1.7-2          gridExtra_2.3         
##  [85] scattermore_1.2        xfun_0.49              matrixStats_1.4.1     
##  [88] stringi_1.8.4          lazyeval_0.2.2         yaml_2.3.10           
##  [91] evaluate_1.0.3         codetools_0.2-20       cli_3.6.2             
##  [94] uwot_0.2.2             xtable_1.8-4           reticulate_1.40.0     
##  [97] munsell_0.5.1          jquerylib_0.1.4        Rcpp_1.0.13           
## [100] globals_0.16.3         spatstat.random_3.3-2  png_0.1-8             
## [103] ggrastr_1.0.2          spatstat.univar_3.1-1  parallel_4.4.0        
## [106] dotCall64_1.2          listenv_0.9.1          viridisLite_0.4.2     
## [109] scales_1.3.0           ggridges_0.5.6         leiden_0.4.3.1        
## [112] rlang_1.1.3            cowplot_1.1.3
citation()
## To cite R in publications use:
## 
##   R Core Team (2024). _R: A Language and Environment for Statistical
##   Computing_. R Foundation for Statistical Computing, Vienna, Austria.
##   <https://www.R-project.org/>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {R: A Language and Environment for Statistical Computing},
##     author = {{R Core Team}},
##     organization = {R Foundation for Statistical Computing},
##     address = {Vienna, Austria},
##     year = {2024},
##     url = {https://www.R-project.org/},
##   }
## 
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.