library(Seurat)
library(SeuratWrappers)
library(monocle)
library(monocle3)
library(ggplot2)
options(repr.plot.width=9, repr.plot.height=6)
ggplot(seu@meta.data, aes(nCount_RNA, nFeature_RNA)) +
geom_hex(bins = 100) +
scale_fill_scico(palette = "devon", direction = -1, end = 0.9) +
scale_x_log10(breaks = breaks_log(12)) +
scale_y_log10(breaks = breaks_log(12)) + annotation_logticks() +
labs(x = "Total UMI counts", y = "Number of genes detected") +
theme(panel.grid.minor = element_blank())
#mitochondrial percentages
ggplot(seu@meta.data, aes(nCount_RNA, percent.mt)) +
geom_pointdensity() +
scale_color_scico(palette = "devon", direction = -1, end = 0.9) +
labs(x = "Total UMI counts", y = "Percentage mitochondrial")
# set up list of canonical cell type markers
canonical_markers <- list(
'Astrocyte' = c('GFAP', 'AQP4', 'SLC1A2'),
'Pan-neuronal' = c('SNAP25', 'SYT1'),
'Excitatory Neuron' = c('SLC17A7', 'SATB2'),
'Inhibitory Neuron' = c('GAD1', 'GAD2'),
'Microglia' = c('CSF1R', 'CD74', 'P2RY12'),
'Oligodendrocyte' = c('MOBP', 'MBP', 'MOG'),
'Olig. Progenitor' = c('PDGFRA', 'CSPG4')
)
# plot heatmap:
library(viridis)
png('figures/basic_canonical_marker_heatmap.png', width=10, height=10, units='in', res=200)
DoHeatmap(seurat_obj, group.by ="seurat_clusters", features=as.character(unlist(canonical_markers))) # + scale_fill_gradientn(colors=viridis(256))
dev.off()
# create feature plots, cutoff expression values for the 98th and 99th percentile
plot_list <- FeaturePlot(
seurat_obj,
features=unlist(canonical_markers),
combine=FALSE, cols=viridis(256),
max.cutoff='q98'
)
# apply theme to each feature plot
for(i in 1:length(plot_list)){
plot_list[[i]] <- plot_list[[i]] + umap_theme + NoLegend()
}
png('figures/basic_canonical_marker_featurePlot.png', width=10, height=10, units='in', res=200)
CombinePlots(plot_list)
dev.off()
for(celltype in names(canonical_markers)){
print(celltype)
cur_features <- canonical_markers[[celltype]]
# plot distributions for marker genes:
p <- VlnPlot(
seurat_obj,
group.by='seurat_clusters',
features=cur_features,
pt.size = 0, ncol=1
)
png(paste0('figures/basic_canonical_marker_',celltype,'_vlnPlot.png'), width=10, height=3*length(cur_features), units='in', res=200)
print(p)
dev.off()
}
# add barcode and UMAP to metadata
seurat_obj$barcode <- colnames(seurat_obj)
seurat_obj$UMAP_1 <- seurat_obj@reductions$umap@cell.embeddings[,1]
seurat_obj$UMAP_2 <- seurat_obj@reductions$umap@cell.embeddings[,2]
# save seurat object
saveRDS(seurat_obj, file='data/processed_seurat_object.rds')
# load seurat object
seurat_obj <- readRDS(ile='data/processed_seurat_object.rds')
library(Seurat)
library(tidyverse)
library(Matrix)
library(cowplot)
theme_set(theme_cowplot())
data_dir <- '/dfs3/swaruplab/smorabit/analysis/cross-disorder/data/trem2_nmed/'
files <- dir(data_dir)
files <- files[grepl('matrix', files)]
file_stems <- str_replace_all(files, '_matrix.mtx', '')
# construct a list of seurat objects for each sample by iteratively loading each file
seurat_list <- lapply(file_stems, function(file){
print(file)
X <- readMM(paste0(data_dir,file,'_matrix.mtx'))
genes <- read.csv(file=paste0(data_dir,file,'_features.tsv'), sep='\t', header=FALSE)
barcodes <- read.csv(file=paste0(data_dir,file,'_barcodes.tsv'), sep='\t', header=FALSE)
colnames(X) <- barcodes$V1
rownames(X) <- genes$V2
cur_seurat <- CreateSeuratObject(
counts = X,
project = "tutorial"
)
cur_seurat@meta.data$SampleID <- file
cur_seurat
})
# merge into one big seurat object
seurat_obj <- merge(x=seurat_list[[1]], y=seurat_list[2:length(seurat_list)])
# add metadata
meta <- read.csv(paste0(data_dir, 'meta.csv'))
rownames(meta) <- meta$Sample.ID.in.snRNA.seq
meta.data <- meta[seurat_obj$SampleID,]
for(m in names(meta.data)){
seurat_obj@meta.data[[m]] <- meta.data[[m]]
}
# remove individual seurat objects to save memory
rm(seurat_list); gc();
# use table function to get the number of cells in each Sample as a dataframe
df <- as.data.frame(rev(table(seurat_obj$SampleID)))
colnames(df) <- c('SampleID', 'n_cells')
# bar plot of the number of cells in each sample
p <- ggplot(df, aes(y=n_cells, x=reorder(SampleID, -n_cells), fill=SampleID)) +
geom_bar(stat='identity') +
scale_y_continuous(expand = c(0,0)) +
NoLegend() + RotatedAxis() +
ylab(expression(italic(N)[cells])) + xlab('Sample ID') +
ggtitle(paste('Total cells:', sum(df$n_cells))) +
theme(
panel.grid.minor=element_blank(),
panel.grid.major.y=element_line(colour="lightgray", size=0.5),
)
png('figures/basic_cells_per_sample.png', width=9, height=4, res=200, units='in')
print(p)
dev.off()
# plot the number of DEGs per cluster:
df <- as.data.frame(rev(table(cluster_markers$CellType_cluster)))
colnames(df) <- c('cluster', 'n_DEGs')
# bar plot of the number of cells in each sample
p <- ggplot(df, aes(y=n_DEGs, x=reorder(cluster, -n_DEGs), fill=cluster)) +
geom_bar(stat='identity') +
scale_y_continuous(expand = c(0,0)) +
NoLegend() + RotatedAxis() +
ylab(expression(italic(N)[DEGs])) + xlab('') +
theme(
panel.grid.minor=element_blank(),
panel.grid.major.y=element_line(colour="lightgray", size=0.5),
)
png('figures/basic_DEGs_barplot.png', width=9, height=4, res=300, units='in')
print(p)
dev.off()
# plot the top 3 DEGs per cluster as a heatmap:
top_DEGs <- cluster_markers %>%
group_by(CellType_cluster) %>%
top_n(3, wt=avg_logFC) %>%
.$gene
png('figures/basic_DEGs_heatmap.png', width=10, height=10, res=300, units='in')
pdf('figures/basic_DEGs_heatmap.pdf', width=15, height=12, useDingbats=FALSE)
DoHeatmap(seurat_obj, features=top_DEGs, group.by='seurat_clusters', label=FALSE) + scale_fill_gradientn(colors=viridis(256)) + NoLegend()
dev.off()
cds <- cluster_cells(cds)
plot_cells(cds, color_cells_by = "partition", group_cells_by = "partition",
group_label_size = 4)
#monocle plot cells
plot_cells(cds, reduction_method = "PCA",
color_cells_by = "cell_type", group_label_size = 3.5,
label_groups_by_cluster = FALSE) +
scale_color_d3(palette = "category20b")
# Seed for random initiation of UMAP
set.seed(4837)
cds <- reduce_dimension(cds, reduction_method = "UMAP", preprocess_method = "PCA", init = "random")
plot_cells(cds, color_cells_by = "cell_type", group_label_size = 3.5,
label_groups_by_cluster = FALSE) +
scale_color_d3(palette = "category20b")
plot_cells(cds, color_cells_by = "cluster", group_cells_by = "cluster",
group_label_size = 4)
#Monocle 3’s trajectory inference is inspired by PAGA.
cds <- learn_graph(cds, verbose = FALSE,
learn_graph_control = list(minimal_branch_len = 7,
geodesic_distance_ratio = 0.5))
plot_cells(cds, color_cells_by = "cell_type", label_groups_by_cluster = FALSE,
group_label_size = 3.5, graph_label_size = 2) +
scale_color_d3(palette = "category20b")
---
title: "R Notebook"
output: html_notebook
---
---
title: "basics for monocle and seurat based plots"
output:
  pdf_document: default
  html_document:
    df_print: paged
---
```{r}
library(Seurat)
library(SeuratWrappers)
library(monocle)
library(monocle3)
library(ggplot2)

options(repr.plot.width=9, repr.plot.height=6)
ggplot(seu@meta.data, aes(nCount_RNA, nFeature_RNA)) +
  geom_hex(bins = 100) +
  scale_fill_scico(palette = "devon", direction = -1, end = 0.9) +
  scale_x_log10(breaks = breaks_log(12)) + 
  scale_y_log10(breaks = breaks_log(12)) + annotation_logticks() +
  labs(x = "Total UMI counts", y = "Number of genes detected") +
  theme(panel.grid.minor = element_blank())
```
```{r}
#mitochondrial percentages
ggplot(seu@meta.data, aes(nCount_RNA, percent.mt)) +
  geom_pointdensity() +
  scale_color_scico(palette = "devon", direction = -1, end = 0.9) +
  labs(x = "Total UMI counts", y = "Percentage mitochondrial")
```

```{r}
# set up list of canonical cell type markers
canonical_markers <- list(
  'Astrocyte' = c('GFAP', 'AQP4', 'SLC1A2'),
  'Pan-neuronal' = c('SNAP25', 'SYT1'),
  'Excitatory Neuron' = c('SLC17A7', 'SATB2'),
  'Inhibitory Neuron' = c('GAD1', 'GAD2'),
  'Microglia' = c('CSF1R', 'CD74', 'P2RY12'),
  'Oligodendrocyte' = c('MOBP', 'MBP', 'MOG'),
  'Olig. Progenitor' = c('PDGFRA', 'CSPG4')
)

# plot heatmap:
library(viridis)
png('figures/basic_canonical_marker_heatmap.png', width=10, height=10, units='in', res=200)
DoHeatmap(seurat_obj, group.by ="seurat_clusters", features=as.character(unlist(canonical_markers))) # + scale_fill_gradientn(colors=viridis(256))
dev.off()

# create feature plots, cutoff expression values for the 98th and 99th percentile
plot_list <- FeaturePlot(
  seurat_obj,
  features=unlist(canonical_markers),
  combine=FALSE, cols=viridis(256),
  max.cutoff='q98'
)

# apply theme to each feature plot
for(i in 1:length(plot_list)){
  plot_list[[i]] <- plot_list[[i]] + umap_theme + NoLegend()
}

png('figures/basic_canonical_marker_featurePlot.png', width=10, height=10, units='in', res=200)
CombinePlots(plot_list)
dev.off()
for(celltype in names(canonical_markers)){

  print(celltype)
  cur_features <- canonical_markers[[celltype]]

  # plot distributions for marker genes:
  p <- VlnPlot(
    seurat_obj,
    group.by='seurat_clusters',
    features=cur_features,
    pt.size = 0, ncol=1
  )
  png(paste0('figures/basic_canonical_marker_',celltype,'_vlnPlot.png'), width=10, height=3*length(cur_features), units='in', res=200)
  print(p)
  dev.off()

}
```


```{r}
# add barcode and UMAP to metadata
seurat_obj$barcode <- colnames(seurat_obj)
seurat_obj$UMAP_1 <- seurat_obj@reductions$umap@cell.embeddings[,1]
seurat_obj$UMAP_2 <- seurat_obj@reductions$umap@cell.embeddings[,2]

# save seurat object
saveRDS(seurat_obj, file='data/processed_seurat_object.rds')

# load seurat object
seurat_obj <- readRDS(ile='data/processed_seurat_object.rds')
```

```{r}
library(Seurat)
library(tidyverse)
library(Matrix)
library(cowplot)
theme_set(theme_cowplot())

data_dir <- '/dfs3/swaruplab/smorabit/analysis/cross-disorder/data/trem2_nmed/'
files <- dir(data_dir)
files <- files[grepl('matrix', files)]
file_stems <- str_replace_all(files, '_matrix.mtx', '')

# construct a list of seurat objects for each sample by iteratively loading each file
seurat_list <- lapply(file_stems, function(file){
  print(file)
  X <- readMM(paste0(data_dir,file,'_matrix.mtx'))

  genes <- read.csv(file=paste0(data_dir,file,'_features.tsv'), sep='\t', header=FALSE)
  barcodes <- read.csv(file=paste0(data_dir,file,'_barcodes.tsv'), sep='\t', header=FALSE)

  colnames(X) <- barcodes$V1
  rownames(X) <- genes$V2

  cur_seurat <- CreateSeuratObject(
    counts = X,
    project = "tutorial"
  )
  cur_seurat@meta.data$SampleID <- file
  cur_seurat
})

# merge into one big seurat object
seurat_obj <- merge(x=seurat_list[[1]], y=seurat_list[2:length(seurat_list)])

# add metadata
meta <- read.csv(paste0(data_dir, 'meta.csv'))
rownames(meta) <- meta$Sample.ID.in.snRNA.seq

meta.data <- meta[seurat_obj$SampleID,]
for(m in names(meta.data)){
  seurat_obj@meta.data[[m]] <- meta.data[[m]]
}

# remove individual seurat objects to save memory
rm(seurat_list); gc();
```

```{r}
# use table function to get the number of cells in each Sample as a dataframe
df <- as.data.frame(rev(table(seurat_obj$SampleID)))
colnames(df) <- c('SampleID', 'n_cells')

# bar plot of the number of cells in each sample
p <- ggplot(df, aes(y=n_cells, x=reorder(SampleID, -n_cells), fill=SampleID)) +
  geom_bar(stat='identity') +
  scale_y_continuous(expand = c(0,0)) +
  NoLegend() + RotatedAxis() +
  ylab(expression(italic(N)[cells])) + xlab('Sample ID') +
  ggtitle(paste('Total cells:', sum(df$n_cells))) +
  theme(
    panel.grid.minor=element_blank(),
    panel.grid.major.y=element_line(colour="lightgray", size=0.5),
  )

png('figures/basic_cells_per_sample.png', width=9, height=4, res=200, units='in')
print(p)
dev.off()
```

```{r}
# plot the number of DEGs per cluster:
df <- as.data.frame(rev(table(cluster_markers$CellType_cluster)))
colnames(df) <- c('cluster', 'n_DEGs')

# bar plot of the number of cells in each sample
p <- ggplot(df, aes(y=n_DEGs, x=reorder(cluster, -n_DEGs), fill=cluster)) +
  geom_bar(stat='identity') +
  scale_y_continuous(expand = c(0,0)) +
  NoLegend() + RotatedAxis() +
  ylab(expression(italic(N)[DEGs])) + xlab('') +
  theme(
    panel.grid.minor=element_blank(),
    panel.grid.major.y=element_line(colour="lightgray", size=0.5),
  )

png('figures/basic_DEGs_barplot.png', width=9, height=4, res=300, units='in')
print(p)
dev.off()

# plot the top 3 DEGs per cluster as a heatmap:
top_DEGs <- cluster_markers %>%
  group_by(CellType_cluster) %>%
  top_n(3, wt=avg_logFC) %>%
  .$gene

png('figures/basic_DEGs_heatmap.png', width=10, height=10, res=300, units='in')
pdf('figures/basic_DEGs_heatmap.pdf', width=15, height=12, useDingbats=FALSE)
DoHeatmap(seurat_obj, features=top_DEGs, group.by='seurat_clusters', label=FALSE) + scale_fill_gradientn(colors=viridis(256)) + NoLegend()
dev.off()
```

```{r}
cds <- cluster_cells(cds)
plot_cells(cds, color_cells_by = "partition", group_cells_by = "partition", 
           group_label_size = 4)
```


```{r}
#monocle plot cells
plot_cells(cds, reduction_method = "PCA",
           color_cells_by = "cell_type", group_label_size = 3.5,
           label_groups_by_cluster = FALSE) +
  scale_color_d3(palette = "category20b")
# Seed for random initiation of UMAP
set.seed(4837)
cds <- reduce_dimension(cds, reduction_method = "UMAP", preprocess_method = "PCA", init = "random")
plot_cells(cds, color_cells_by = "cell_type", group_label_size = 3.5,
           label_groups_by_cluster = FALSE) +
  scale_color_d3(palette = "category20b")
```

```{r}
plot_cells(cds, color_cells_by = "cluster", group_cells_by = "cluster", 
           group_label_size = 4)
```

```{r}
#Monocle 3’s trajectory inference is inspired by PAGA.
cds <- learn_graph(cds, verbose = FALSE, 
                   learn_graph_control = list(minimal_branch_len = 7,
                                              geodesic_distance_ratio = 0.5))
```

```{r}
plot_cells(cds, color_cells_by = "cell_type", label_groups_by_cluster = FALSE,
           group_label_size = 3.5, graph_label_size = 2) +
  scale_color_d3(palette = "category20b")
```

```{r}

```

