Figure_1_data_analysis

#This script inputs six rds files (3 injured, 3 uninjured) of lumbar- projecting neurons. #It performs QC, combines and normalizes the files, then assigns and identity to clusters based on marker genes.

library('Seurat')

## Loading required package: SeuratObject

## Loading required package: sp

## 'SeuratObject' was built under R 4.3.2 but the current version is
## 4.3.3; it is recomended that you reinstall 'SeuratObject' as the ABI
## for R may have changed

## 'SeuratObject' was built with package 'Matrix' 1.6.4 but the current
## version is 1.6.5; it is recomended that you reinstall 'SeuratObject' as
## the ABI for 'Matrix' may have changed

## 
## Attaching package: 'SeuratObject'

## The following object is masked from 'package:base':
## 
##     intersect

library('ggplot2')
library('plyr')
library('dplyr')

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library('viridis')

## Loading required package: viridisLite

library('ggthemes')
library('plotly')

## 
## Attaching package: 'plotly'

## The following objects are masked from 'package:plyr':
## 
##     arrange, mutate, rename, summarise

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library('cowplot')

## 
## Attaching package: 'cowplot'

## The following object is masked from 'package:ggthemes':
## 
##     theme_map

library('scCustomize')

## scCustomize v2.1.2
## If you find the scCustomize useful please cite.
## See 'samuel-marsh.github.io/scCustomize/articles/FAQ.html' for citation info.

set.seed(291)

#Load the Input data

Sample1.data <- Read10X(data.dir = "1_SourceFiles/LU2/")
Sample1 <- CreateSeuratObject(counts = Sample1.data, project = "Sample1", min.cells = 3, min.features = 1000)
Sample1

## An object of class Seurat 
## 24164 features across 2679 samples within 1 assay 
## Active assay: RNA (24164 features, 0 variable features)
##  1 layer present: counts

VlnPlot(Sample1, features = c("nFeature_RNA"))

## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

Sample1 <- subset(Sample1, subset = nFeature_RNA > 1000 & nFeature_RNA < 6000)
Sample1

## An object of class Seurat 
## 24164 features across 2654 samples within 1 assay 
## Active assay: RNA (24164 features, 0 variable features)
##  1 layer present: counts

Sample2.data <- Read10X(data.dir = "1_SourceFiles/LU3")
Sample2 <- CreateSeuratObject(counts = Sample2.data, project = "Sample2",min.cells = 3, min.features = 1000)
Sample2

## An object of class Seurat 
## 24763 features across 2770 samples within 1 assay 
## Active assay: RNA (24763 features, 0 variable features)
##  1 layer present: counts

VlnPlot(Sample2, features = c("nFeature_RNA"))

## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

Sample2 <- subset(Sample2, subset = nFeature_RNA > 1000 & nFeature_RNA < 8000)
Sample2

## An object of class Seurat 
## 24763 features across 2608 samples within 1 assay 
## Active assay: RNA (24763 features, 0 variable features)
##  1 layer present: counts

Sample3.data <- Read10X(data.dir = "1_SourceFiles/LU4")
Sample3 <- CreateSeuratObject(counts = Sample3.data, project = "Sample3", min.cells = 3, min.features = 1000)
Sample3

## An object of class Seurat 
## 25155 features across 3579 samples within 1 assay 
## Active assay: RNA (25155 features, 0 variable features)
##  1 layer present: counts

VlnPlot(Sample3, features = c("nFeature_RNA"))

## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

Sample3 <- subset(Sample3, subset = nFeature_RNA > 1000 & nFeature_RNA < 7000)
Sample3

## An object of class Seurat 
## 25155 features across 3518 samples within 1 assay 
## Active assay: RNA (25155 features, 0 variable features)
##  1 layer present: counts

Sample4.data <- Read10X(data.dir = "1_SourceFiles/LI1")
Sample4 <- CreateSeuratObject(counts = Sample4.data, project = "Sample4",min.cells = 3, min.features = 1000)
Sample4

## An object of class Seurat 
## 23188 features across 2778 samples within 1 assay 
## Active assay: RNA (23188 features, 0 variable features)
##  1 layer present: counts

VlnPlot(Sample4, features = c("nFeature_RNA"))

## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

Sample4 <- subset(Sample4, subset = nFeature_RNA > 1000 & nFeature_RNA < 5500)
Sample4

## An object of class Seurat 
## 23188 features across 2744 samples within 1 assay 
## Active assay: RNA (23188 features, 0 variable features)
##  1 layer present: counts

Sample5.data <- Read10X(data.dir = "1_SourceFiles/LI2")
Sample5 <- CreateSeuratObject(counts = Sample5.data, project = "Sample5", min.cells = 3, min.features = 1000)
Sample5

## An object of class Seurat 
## 23121 features across 2487 samples within 1 assay 
## Active assay: RNA (23121 features, 0 variable features)
##  1 layer present: counts

VlnPlot(Sample5, features = c("nFeature_RNA"))

## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

Sample5 <- subset(Sample5, subset = nFeature_RNA > 1000 & nFeature_RNA < 7500)
Sample5

## An object of class Seurat 
## 23121 features across 2470 samples within 1 assay 
## Active assay: RNA (23121 features, 0 variable features)
##  1 layer present: counts

Sample6.data <- Read10X(data.dir = "1_SourceFiles/LI3")
Sample6 <- CreateSeuratObject(counts = Sample6.data, project = "Sample6", min.cells = 3, min.features = 1000)
Sample6

## An object of class Seurat 
## 25137 features across 3143 samples within 1 assay 
## Active assay: RNA (25137 features, 0 variable features)
##  1 layer present: counts

VlnPlot(Sample6, features = c("nFeature_RNA"))

## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

Sample6 <- subset(Sample6, subset = nFeature_RNA > 1000 & nFeature_RNA < 7500)
Sample6

## An object of class Seurat 
## 25137 features across 3089 samples within 1 assay 
## Active assay: RNA (25137 features, 0 variable features)
##  1 layer present: counts

#Merge all the data set

LMN_merge.list <- list (Sample1, Sample2, Sample3, Sample4, Sample5, Sample6)

LMN_merge.list <- lapply(X=LMN_merge.list, FUN = function(x) {
  x <- NormalizeData(x)
  x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

## Normalizing layer: counts

## Finding variable features for layer counts

## Normalizing layer: counts

## Finding variable features for layer counts

## Normalizing layer: counts

## Finding variable features for layer counts

## Normalizing layer: counts

## Finding variable features for layer counts

## Normalizing layer: counts

## Finding variable features for layer counts

## Normalizing layer: counts

## Finding variable features for layer counts

features <- SelectIntegrationFeatures(object.list = LMN_merge.list)

LMN_merge.anchors <- FindIntegrationAnchors(object.list = LMN_merge.list,
                                            anchor.features = features)

## Warning in CheckDuplicateCellNames(object.list = object.list): Some cell names
## are duplicated across objects provided. Renaming to enforce unique cell names.

## Scaling features for provided objects

## Finding all pairwise anchors

## Running CCA

## Merging objects

## Finding neighborhoods

## Finding anchors

##  Found 5827 anchors

## Filtering anchors

##  Retained 4718 anchors

## Running CCA

## Merging objects

## Finding neighborhoods

## Finding anchors

##  Found 7165 anchors

## Filtering anchors

##  Retained 5893 anchors

## Running CCA

## Merging objects

## Finding neighborhoods

## Finding anchors

##  Found 7272 anchors

## Filtering anchors

##  Retained 5342 anchors

## Running CCA

## Merging objects

## Finding neighborhoods

## Finding anchors

##  Found 6276 anchors

## Filtering anchors

##  Retained 5071 anchors

## Running CCA

## Merging objects

## Finding neighborhoods

## Finding anchors

##  Found 6196 anchors

## Filtering anchors

##  Retained 4665 anchors

## Running CCA

## Merging objects

## Finding neighborhoods

## Finding anchors

##  Found 7354 anchors

## Filtering anchors

##  Retained 5768 anchors

## Running CCA

## Merging objects

## Finding neighborhoods

## Finding anchors

##  Found 5807 anchors

## Filtering anchors

##  Retained 3840 anchors

## Running CCA

## Merging objects

## Finding neighborhoods

## Finding anchors

##  Found 6731 anchors

## Filtering anchors

##  Retained 4258 anchors

## Running CCA

## Merging objects

## Finding neighborhoods

## Finding anchors

##  Found 7313 anchors

## Filtering anchors

##  Retained 4945 anchors

## Running CCA

## Merging objects

## Finding neighborhoods

## Finding anchors

##  Found 6274 anchors

## Filtering anchors

##  Retained 4469 anchors

## Running CCA

## Merging objects

## Finding neighborhoods

## Finding anchors

##  Found 6486 anchors

## Filtering anchors

##  Retained 5521 anchors

## Running CCA

## Merging objects

## Finding neighborhoods

## Finding anchors

##  Found 6559 anchors

## Filtering anchors

##  Retained 5149 anchors

## Running CCA

## Merging objects

## Finding neighborhoods

## Finding anchors

##  Found 7690 anchors

## Filtering anchors

##  Retained 6328 anchors

## Running CCA

## Merging objects

## Finding neighborhoods

## Finding anchors

##  Found 6788 anchors

## Filtering anchors

##  Retained 5900 anchors

## Running CCA

## Merging objects

## Finding neighborhoods

## Finding anchors

##  Found 6372 anchors

## Filtering anchors

##  Retained 4501 anchors

LMN <- IntegrateData(anchorset = LMN_merge.anchors)

## Merging dataset 1 into 3

## Extracting anchors for merged samples

## Finding integration vectors

## Finding integration vector weights

## Integrating data

## Merging dataset 4 into 6

## Extracting anchors for merged samples

## Finding integration vectors

## Finding integration vector weights

## Integrating data

## Merging dataset 6 4 into 3 1

## Extracting anchors for merged samples

## Finding integration vectors

## Finding integration vector weights

## Integrating data

## Merging dataset 2 into 3 1 6 4

## Extracting anchors for merged samples

## Finding integration vectors

## Warning: Different cells in new layer data than already exists for scale.data

## Finding integration vector weights

## Integrating data

## Merging dataset 5 into 3 1 6 4 2

## Extracting anchors for merged samples

## Finding integration vectors

## Warning: Different cells in new layer data than already exists for scale.data

## Finding integration vector weights

## Integrating data

DefaultAssay(LMN) <- "integrated"

LMN <- ScaleData(LMN, verbose = FALSE)

LMN <- RunPCA(LMN, npcs = 50, verbose = FALSE)

LMN <- RunUMAP(LMN, reduction = "pca", dims = 1:30)

## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session

## 14:19:37 UMAP embedding parameters a = 0.9922 b = 1.112

## 14:19:37 Read 17083 rows and found 30 numeric columns

## 14:19:37 Using Annoy for neighbor search, n_neighbors = 30

## 14:19:37 Building Annoy index with metric = cosine, n_trees = 50

## 0%   10   20   30   40   50   60   70   80   90   100%

## [----|----|----|----|----|----|----|----|----|----|

## **************************************************|
## 14:19:43 Writing NN index file to temp file /tmp/RtmpzrR6OD/file948141fbbdcc4
## 14:19:43 Searching Annoy index using 1 thread, search_k = 3000
## 14:19:50 Annoy recall = 100%
## 14:19:51 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 14:19:52 Initializing from normalized Laplacian + noise (using RSpectra)
## 14:19:54 Commencing optimization for 200 epochs, with 757542 positive edges
## 14:20:08 Optimization finished

LMN <- FindNeighbors(LMN, reduction = "pca", dims = 1:30)

## Computing nearest neighbor graph
## Computing SNN

LMN <- FindClusters(LMN, resolution = 0.5)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 17083
## Number of edges: 740491
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9202
## Number of communities: 27
## Elapsed time: 5 seconds

DefaultAssay(LMN) <- "RNA"

LMN

## An object of class Seurat 
## 28586 features across 17083 samples within 2 assays 
## Active assay: RNA (26586 features, 2000 variable features)
##  18 layers present: counts.Sample1, counts.Sample2, counts.Sample3, counts.Sample4, counts.Sample5, counts.Sample6, data.Sample1, scale.data.Sample1, data.Sample2, scale.data.Sample2, data.Sample3, scale.data.Sample3, data.Sample4, scale.data.Sample4, data.Sample5, scale.data.Sample5, data.Sample6, scale.data.Sample6
##  1 other assay present: integrated
##  2 dimensional reductions calculated: pca, umap

DimPlot(LMN, group.by = 'seurat_clusters', split.by = 'orig.ident',
        label = TRUE, label.size = 5) + NoLegend()

#Remove the clearly stressed, dying, or artificially low cell count clusters.

FeaturePlot(LMN, c("Ddit3"), label = TRUE, 
            label.size = 5, repel = TRUE, order = TRUE, split.by = "orig.ident")

## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

FeaturePlot(LMN, c("nFeature_RNA"), label = TRUE, 
            label.size = 5, repel = TRUE, order = TRUE)

VlnPlot(LMN, "nFeature_RNA", pt.size = 0) + NoLegend() +
  theme(axis.text.x = element_text(angle = 0, hjust = .5)) +
  theme(text = element_text(size = 20))

VlnPlot(LMN, "Ddit3", pt.size = 0) + NoLegend() +
  theme(axis.text.x = element_text(angle = 0, hjust = .5)) +
  theme(text = element_text(size = 20))

Idents(LMN) <- "seurat_clusters"

LMN1 <- subset(x = LMN, idents = c("4", "11", "12", "24","25"), invert = TRUE)

#A confidence check visualization to confirm proper removal

DimPlot(LMN1, group.by = 'seurat_clusters', split.by = 'orig.ident', label = TRUE, label.size = 5) + NoLegend()

#Renormalize the new dataset that lacks the dying and low-count clusters cells

DefaultAssay(LMN1) <- "RNA"

LMN1 <- NormalizeData(LMN1, normalization.method = "LogNormalize", scale.factor = 10000)

## Normalizing layer: counts.Sample1

## Normalizing layer: counts.Sample2

## Normalizing layer: counts.Sample3

## Normalizing layer: counts.Sample4

## Normalizing layer: counts.Sample5

## Normalizing layer: counts.Sample6

LMN1 <- FindVariableFeatures(LMN1, selection.method = "vst", nfeatures = 2000)

## Finding variable features for layer counts.Sample1

## Finding variable features for layer counts.Sample2

## Finding variable features for layer counts.Sample3

## Finding variable features for layer counts.Sample4

## Finding variable features for layer counts.Sample5

## Finding variable features for layer counts.Sample6

all.genes <- rownames(LMN1)

LMN1 <- ScaleData(LMN1, features = all.genes)

## Centering and scaling data matrix

LMN1 <- RunPCA(LMN1, features = VariableFeatures(object = LMN1), npcs = 50)

## PC_ 1 
## Positive:  Lsamp, Dcc, Rmst, Npas3, Kcnc2, Rbms3, 5330434G04Rik, Zbtb20, Inpp4b, Nhs 
##     Camk2d, Gria4, Lhfpl3, Grip1, Cdh18, Scn9a, Gm12239, Gnas, C530008M17Rik, Grb10 
##     Gm32647, Grin3a, Pcdh15, Kif26b, Btbd11, Glra1, Zfhx3, Ttc28, 6330411D24Rik, Nrg1 
## Negative:  Nfib, Celf2, Sox5, Camk2a, Mical2, Dpp10, Dscaml1, Pex5l, Slit3, Msra 
##     Nos1ap, Tox, Phactr1, Mef2c, Arpp21, Kalrn, Itpr1, Tspan5, Tcf4, Pde1a 
##     Cacna2d3, Sv2b, Khdrbs3, Grin2a, Tafa1, Ptk2b, Iqgap2, Gpm6b, Cap2, Cacnb4 
## PC_ 2 
## Positive:  Baiap3, B230216N24Rik, Nnat, Rgs9, Gck, Dlk1, Ros1, Gm45194, Rfx4, Gm4876 
##     Tmcc3, Trpm3, 6330403K07Rik, Plagl1, Cpne7, Hcrt, Lhx9, Ptprk, Gm2824, Rorb 
##     Gnasas1, Zim1, Stac, Hs3st4, Tead1, Rfx2, 4930548K13Rik, Lypd6, Trhr, 1700042O10Rik 
## Negative:  Tll1, Ttc6, Rreb1, Mitf, Lrrc4c, 4921518K17Rik, Syt2, Alk, D030068K23Rik, Emx2os 
##     D130079A08Rik, Unc13c, Zfp804b, Zmat4, Zfp385b, Chst9, 9030622O22Rik, Gm13629, 4930445B16Rik, Etl4 
##     Ano4, Tox3, D130009I18Rik, Colgalt2, Cdh18, Fam163a, Ndst4, Caln1, Emcn, Mme 
## PC_ 3 
## Positive:  B230216N24Rik, Pam, Plagl1, Gm2694, Ros1, Cpne4, Srgap1, Pde3a, Tll1, Gck 
##     Thsd7a, Nnat, D130079A08Rik, Ttc6, Klhl1, Nek7, Man1a, Lama3, Cachd1, Gm45194 
##     Rfx4, Gm27016, Rfx2, Slc2a13, Hcrt, Pcsk6, Phactr2, D130009I18Rik, 4930548K13Rik, Trhr 
## Negative:  Meis2, Reln, Pard3b, Epha6, Adamts2, Grik1, Sema5a, Tshz1, Lhfpl2, Pard3bos1 
##     Glra1, Car10, Crp, Gm12239, Onecut1, Nrp2, Zbtb20, Ebf3, Sema6d, Gm12236 
##     Lrrtm4, Mgat4c, Cdh7, Rtl4, Nxph1, Lncenc1, Pbx3, Gm10475, Pde4b, Dchs2 
## PC_ 4 
## Positive:  Alcam, Ebf1, Gpc6, Prdm6, Hs6st3, Zfp521, Hs3st4, Dgkb, Gsg1l, Tafa2 
##     Rnf220, Plppr1, Tmeff2, Pde11a, Gpc5, Cntnap5b, Svep1, Nkain3, Spon1, Gm20713 
##     Egfem1, Dach1, Gm32828, Plcl1, Lin7a, Calcr, Lhfpl3, Meis2, Ptchd4, Cdh12 
## Negative:  Hcrt, Lypd6, Ros1, Plagl1, Zfhx3, Rfx4, B230216N24Rik, Pard3b, Tshz2, Rfx2 
##     Sox1ot, 4930548K13Rik, Reln, Gm867, Nnat, Insyn2b, Crp, Sema5a, Tshz1, Gm2694 
##     1700123O12Rik, Lama3, Trhr, D030068K23Rik, Zfp804b, Gm5149, Nek7, Chn2, Gm28376, Lhfpl2 
## PC_ 5 
## Positive:  Mgat4c, Pard3b, Kcnd3, Epha6, Adarb2, Lypd6, Reln, Zfhx3, Lrrtm4, Sema5a 
##     Shisa9, Galntl6, Tshz2, Schip1, Pcdh15, Syt1, Prr16, Sema6d, Lncenc1, Insyn2b 
##     Cacna1e, Dock2, Dpyd, Pard3bos1, Ndst4, Tshz1, Kcnip1, Gm15398, March1, Col8a1 
## Negative:  Dach1, Ebf1, Prdm6, Spx, Gpc5, Gm20713, Erbb4, Pde11a, Cdh12, Stk32b 
##     Crh, Shroom4, Svep1, Alcam, Met, Dgkk, 4930509J09Rik, Npas1, Nkain3, Gm21847 
##     Gsg1l, Adam12, Tmeff2, Gm30613, Gm32828, Asb4, Trhde, Plcl1, Kcns3, Hs6st3

LMN1 <- FindNeighbors(LMN1, dims = 1:30)

## Computing nearest neighbor graph

## Computing SNN

LMN1 <- FindClusters(LMN1, resolution = 0.5)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 14894
## Number of edges: 517256
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9155
## Number of communities: 22
## Elapsed time: 4 seconds

LMN1 <- RunUMAP(LMN1, dims = 1:30)

## 14:23:29 UMAP embedding parameters a = 0.9922 b = 1.112

## 14:23:29 Read 14894 rows and found 30 numeric columns

## 14:23:29 Using Annoy for neighbor search, n_neighbors = 30

## 14:23:29 Building Annoy index with metric = cosine, n_trees = 50

## 0%   10   20   30   40   50   60   70   80   90   100%

## [----|----|----|----|----|----|----|----|----|----|

## **************************************************|
## 14:23:34 Writing NN index file to temp file /tmp/RtmpzrR6OD/file9481410ef0261
## 14:23:34 Searching Annoy index using 1 thread, search_k = 3000
## 14:23:40 Annoy recall = 100%
## 14:23:41 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 14:23:43 Initializing from normalized Laplacian + noise (using RSpectra)
## 14:23:44 Commencing optimization for 200 epochs, with 621738 positive edges
## 14:23:56 Optimization finished

LMN1

## An object of class Seurat 
## 28586 features across 14894 samples within 2 assays 
## Active assay: RNA (26586 features, 2000 variable features)
##  19 layers present: counts.Sample1, counts.Sample2, counts.Sample3, counts.Sample4, counts.Sample5, counts.Sample6, data.Sample1, scale.data.Sample1, data.Sample2, scale.data.Sample2, data.Sample3, scale.data.Sample3, data.Sample4, scale.data.Sample4, data.Sample5, scale.data.Sample5, data.Sample6, scale.data.Sample6, scale.data
##  1 other assay present: integrated
##  2 dimensional reductions calculated: pca, umap

FeaturePlot(LMN1, ("Gad2"),label = TRUE, label.size = 3,repel = FALSE, order = TRUE, ) #500 x 1000

DimPlot(LMN1, group.by = 'seurat_clusters',  label = TRUE, label.size = 5) + NoLegend()

DotPlot(LMN1, features = c("Mef2c", "Camk2a", "Ptk2b", "Satb2", 
                           "Crym", "Fezf2", "Bcl6", "Slc30a3", "Sulf1"),
        dot.scale = 4, dot.min = 0) + coord_flip() +
  theme(axis.text.x = element_text(angle = 0, hjust = .5),
        axis.text = element_text(size = 10))

VlnPlot(LMN1, "Satb2", pt.size = 0) + NoLegend() +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
  theme(text = element_text(size = 15),
        axis.text = element_text(size = 15))

Idents(LMN1) <- "seurat_clusters"
levels(LMN1)

##  [1] "0"  "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14"
## [16] "15" "16" "17" "18" "19" "20" "21"

LMN_Final <- subset(LMN1, idents = c("10","14","18"), invert = TRUE)

#Renormalize and cluster LMN_Final now that cluster 10,14 and 18 are removed (probably won't change much)

DefaultAssay(LMN_Final) <- "RNA"

LMN_Final <- NormalizeData(LMN_Final, normalization.method = "LogNormalize", scale.factor = 10000)

## Normalizing layer: counts.Sample1

## Normalizing layer: counts.Sample2

## Normalizing layer: counts.Sample3

## Normalizing layer: counts.Sample4

## Normalizing layer: counts.Sample5

## Normalizing layer: counts.Sample6

LMN_Final <- FindVariableFeatures(LMN_Final, selection.method = "vst", nfeatures = 2000)

## Finding variable features for layer counts.Sample1

## Finding variable features for layer counts.Sample2

## Finding variable features for layer counts.Sample3

## Finding variable features for layer counts.Sample4

## Finding variable features for layer counts.Sample5

## Finding variable features for layer counts.Sample6

all.genes <- rownames(LMN_Final)

LMN_Final <- ScaleData(LMN_Final, features = all.genes)

## Centering and scaling data matrix

LMN_Final <- RunPCA(LMN_Final, features = VariableFeatures(object = LMN_Final), npcs = 50)

## PC_ 1 
## Positive:  Nfib, Celf2, Sox5, Dscaml1, Mical2, Camk2a, Slit3, Pex5l, Dpp10, Msra 
##     Nos1ap, Tox, Mef2c, Phactr1, Arpp21, Itpr1, Kalrn, Pde1a, Tcf4, Tspan5 
##     Sv2b, Cacna2d3, Khdrbs3, Ptk2b, Grin2a, Iqgap2, Tafa1, Cap2, Gpm6b, Cacnb4 
## Negative:  Dcc, Lsamp, Rmst, Npas3, Kcnc2, Rbms3, 5330434G04Rik, Zbtb20, Inpp4b, Nhs 
##     Gria4, Camk2d, Lhfpl3, Grip1, Cdh18, Gnas, Scn9a, Gm12239, C530008M17Rik, Grb10 
##     Gm32647, Grin3a, Pcdh15, Nrg1, Kif26b, Btbd11, Ttc28, Glra1, Zfhx3, Vwc2l 
## PC_ 2 
## Positive:  Tll1, Ttc6, Rreb1, Mitf, Lrrc4c, Syt2, 4921518K17Rik, D030068K23Rik, Alk, Emx2os 
##     Unc13c, D130079A08Rik, Zfp804b, Zfp385b, Etl4, Zmat4, Chst9, 9030622O22Rik, 4930445B16Rik, Gm13629 
##     Ano4, Tox3, D130009I18Rik, Cdh18, Colgalt2, Fam163a, Zfpm2, Emcn, Cntnap4, Caln1 
## Negative:  Baiap3, B230216N24Rik, Nnat, Rgs9, Gck, Ros1, Dlk1, Rfx4, Gm45194, Gm4876 
##     Tmcc3, Plagl1, 6330403K07Rik, Trpm3, Cpne7, Hcrt, Ptprk, Rorb, Lhx9, Gm2824 
##     Rfx2, Gnasas1, Stac, Zim1, 4930548K13Rik, Tead1, Trhr, Gabrg3, Nek7, Lypd6 
## PC_ 3 
## Positive:  Reln, Pard3b, Epha6, Grik1, Adamts2, Sema5a, Tshz1, Lhfpl2, Pard3bos1, Car10 
##     Crp, Nrp2, Sema6d, Zbtb20, Onecut1, Glra1, Lrrtm4, Pde4b, Ebf3, Lncenc1 
##     Gm12239, Mgat4c, Rtl4, Cdh7, Gm10475, Arsj, Gm12236, Prkg2, Nxph1, Lypd6b 
## Negative:  B230216N24Rik, Pam, Plagl1, Gm2694, Tll1, Cpne4, Ros1, Pde3a, Srgap1, Thsd7a 
##     Ttc6, Gck, D130079A08Rik, Man1a, Nnat, Klhl1, Lama3, Cachd1, Gm27016, Nek7 
##     Rfx4, Gm45194, Rfx2, Hcrt, Slc2a13, D130009I18Rik, 4921518K17Rik, 4930548K13Rik, Trhr, Pcsk6 
## PC_ 4 
## Positive:  Ebf1, Prdm6, Dach1, Alcam, Gpc5, Spx, Gm20713, Pde11a, Gpc6, Svep1 
##     Hs3st4, Gsg1l, Hs6st3, Cdh12, Tmeff2, Zfp521, Stk32b, Shroom4, Nkain3, Gm32828 
##     Erbb4, Tafa2, Plcl1, Dgkk, Trhde, Crh, Rnf220, Calcr, Spon1, 4930509J09Rik 
## Negative:  Lypd6, Zfhx3, Pard3b, Reln, Tshz2, Sema5a, Mgat4c, Tshz1, Shisa9, Insyn2b 
##     Ros1, Plagl1, Rgs6, Lrrtm4, Crp, Pard3bos1, B230216N24Rik, Hcrt, Rfx4, Lhfpl2 
##     Dock2, Zfp804b, Sema6d, Pcdh15, Rfx2, Nnat, Arsj, Gm15398, 4930548K13Rik, Lama3 
## PC_ 5 
## Positive:  Tfap2b, Adarb2, Fgf13, Mctp1, Unc5d, Ntng1, Mgat4c, Arhgap6, Prr16, Slc6a2 
##     Slc18a2, Dgkb, Daam2, Pde4b, Dbh, C130073E24Rik, Ankfn1, Maoa, Epha6, Kcnab1 
##     Sorcs3, Lrp1b, 4930555F03Rik, Galntl6, Th, Gm45321, Kcnd3, Uncx, A830018L16Rik, Tenm3 
## Negative:  Hcrt, Plagl1, Ros1, Rorb, Lhx9, C1ql3, Dach1, Gm45194, Rfx4, Gm21798 
##     4930548K13Rik, Rfx2, Sox1ot, Gm867, Prdm6, Trhr, Scg2, Adcy8, Nek7, Ebf1 
##     Spx, Gpc5, Stac, 1700123O12Rik, Olfm3, Prkg1, Gm20713, Csta2, Dgkk, Epha3

LMN_Final <- FindNeighbors(LMN_Final, dims = 1:30)

## Computing nearest neighbor graph

## Computing SNN

LMN_Final <- FindClusters(LMN_Final, resolution = 0.5)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 14161
## Number of edges: 493314
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9123
## Number of communities: 20
## Elapsed time: 3 seconds

LMN_Final <- RunUMAP(LMN_Final, dims = 1:30)

## 14:26:51 UMAP embedding parameters a = 0.9922 b = 1.112

## 14:26:51 Read 14161 rows and found 30 numeric columns

## 14:26:51 Using Annoy for neighbor search, n_neighbors = 30

## 14:26:51 Building Annoy index with metric = cosine, n_trees = 50

## 0%   10   20   30   40   50   60   70   80   90   100%

## [----|----|----|----|----|----|----|----|----|----|

## **************************************************|
## 14:26:55 Writing NN index file to temp file /tmp/RtmpzrR6OD/file9481452b84b86
## 14:26:55 Searching Annoy index using 1 thread, search_k = 3000
## 14:27:01 Annoy recall = 100%
## 14:27:02 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 14:27:03 Found 3 connected components, falling back to 'spca' initialization with init_sdev = 1
## 14:27:03 Using 'irlba' for PCA
## 14:27:03 PCA: 2 components explained 55.28% variance
## 14:27:03 Scaling init to sdev = 1
## 14:27:04 Commencing optimization for 200 epochs, with 586712 positive edges
## 14:27:14 Optimization finished

LMN_Final

## An object of class Seurat 
## 28586 features across 14161 samples within 2 assays 
## Active assay: RNA (26586 features, 2000 variable features)
##  19 layers present: counts.Sample1, counts.Sample2, counts.Sample3, counts.Sample4, counts.Sample5, counts.Sample6, data.Sample1, scale.data.Sample1, data.Sample2, scale.data.Sample2, data.Sample3, scale.data.Sample3, data.Sample4, scale.data.Sample4, data.Sample5, scale.data.Sample5, data.Sample6, scale.data.Sample6, scale.data
##  1 other assay present: integrated
##  2 dimensional reductions calculated: pca, umap

DimPlot(LMN_Final, group.by = 'seurat_clusters', split.by = 'orig.ident',
        label = TRUE, label.size = 3) + NoLegend()

DimPlot(LMN_Final, group.by = 'seurat_clusters',
        label = TRUE, label.size = 5) + NoLegend()

VlnPlot(LMN_Final, "Rmst", pt.size = 0) + NoLegend() +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
  theme(text = element_text(size = 15),
        axis.text = element_text(size = 15)) #Sized for display

DotPlot(LMN_Final, features = c("Mef2c", "Camk2a", "Ptk2b", "Satb2", 
                           "Crym", "Fezf2", "Bcl6", "Slc30a3", "Sulf1"),
        dot.scale = 4, dot.min = 0) + coord_flip() +
  theme(axis.text.x = element_text(angle = 0, hjust = .5),
        axis.text = element_text(size = 10)) #Sized for display

FeaturePlot(LMN_Final, ("Camk2a"), 
            label = TRUE, label.size = 3,
            repel = FALSE, order = TRUE, ) #500 x 1000

LMN_Final_DotPlot1 <- read.csv("2_Input_Lists/LMN_Final_DotPlot1.csv", sep = ",")
Manoj_CST_marker = read.csv("Manoj_marklist.csv")
Idents(LMN_Final)  <- "seurat_clusters"

DefaultAssay(LMN_Final) <- "RNA"

DotPlot(LMN_Final, features = Manoj_CST_marker$Marker, dot.scale = 9, dot.min = 0) +
  coord_flip() +  theme(axis.text.x = element_text(angle = 0, hjust = .5),  axis.text = element_text(size = 10)) #Sized for display

FeaturePlot(LMN_Final, c("Sox14"), 
            label = TRUE, label.size = 3,
            repel = FALSE, order = TRUE, )

DimPlot_scCustom(seurat_object = LMN_Final,figure_plot = TRUE, label = T)

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

saveRDS(LMN_Final,"LMN_Final_Manoj.rds")

FeaturePlot(LMN_Final, c("Sox14"),label = TRUE, label.size = 4,
            repel = FALSE, order = TRUE, ) #500 x 1000

VlnPlot(LMN_Final, "Ebf2", pt.size = 0) + NoLegend() +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
  theme(text = element_text(size = 15),
        axis.text = element_text(size = 15)) #Sized for display

LMN_Final= readRDS("LMN_Final_Manoj.rds")
LMN_Final_Map1 <- read.csv("CST_cluster_markers.csv",
                           sep = ",")

Idents(LMN_Final) <- "seurat_clusters"

levels(LMN_Final)

##  [1] "0"  "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14"
## [16] "15" "16" "17" "18" "19"

LMN_Final@meta.data$manual.clusters.1 <- plyr::mapvalues(
  x = LMN_Final@meta.data$seurat_clusters,
  from = LMN_Final_Map1$seurat_clusters,
  to = LMN_Final_Map1$manual.cluster.1)

Idents(LMN_Final) <- "manual.clusters.1"

levels(LMN_Final)

##  [1] "CST"           "HB-2-Pard3b"   "DP"            "RN"           
##  [5] "HB-6-Other"    "HB-4-Nox4"     "HB-3-Col19a1"  "HB-1-Daam2"   
##  [9] "PVH"           "HB-5-Pappa"    "LH"            "Raphe / Inhib"
## [13] "LC"            "PAG"           "EW"

DimPlot_scCustom(seurat_object = LMN_Final,figure_plot = TRUE, label = T)

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

LMN_Final@meta.data$seurat_clusters <- factor(
  x = LMN_Final@meta.data$seurat_clusters,
  levels = LMN_Final_Map1$seurat_clusters)

Idents(LMN_Final) <- "seurat_clusters"
levels(LMN_Final)

##  [1] "0"  "1"  "4"  "11" "13" "5"  "17" "16" "15" "3"  "14" "10" "2"  "9"  "7" 
## [16] "12" "8"  "6"  "18" "19"

DimPlot_scCustom(seurat_object = LMN_Final,figure_plot = TRUE, label = T)

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

LMN_Final_Order1 <- read.csv("2_Input_Lists/LMN_Final_Order1.csv",
                             sep = ",")

LMN_Final@meta.data$manual.clusters.1 <- factor(
  x = LMN_Final@meta.data$manual.clusters.1,
  levels = LMN_Final_Order1$Order)

Idents(LMN_Final) <- "manual.clusters.1"
levels(LMN_Final)

##  [1] "CST"           "PVH"           "LH"            "RN"           
##  [5] "EW"            "PAG"           "LC"            "DP"           
##  [9] "Raphe / Inhib" "HB-1-Daam2"    "HB-2-Pard3b"   "HB-3-Col19a1" 
## [13] "HB-4-Nox4"     "HB-5-Pappa"    "HB-6-Other"

DimPlot_scCustom(seurat_object = LMN_Final,figure_plot = TRUE, label = T)

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

DefaultAssay(LMN_Final) <- "RNA"

LMN_Final_Map2 <- read.csv("2_Input_Lists/LMN_Final_Map2.csv",sep = ",")

Idents(LMN_Final) <- "orig.ident"
levels(LMN_Final)

## [1] "Sample1" "Sample2" "Sample3" "Sample4" "Sample5" "Sample6"

LMN_Final@meta.data$injury.status <- plyr::mapvalues(
  x = LMN_Final@meta.data$orig.ident,
  from = LMN_Final_Map2$orig.ident,
  to = LMN_Final_Map2$injury.status)

Idents(LMN_Final) <- "injury.status"
levels(LMN_Final)

## [1] "uninjured" "injured"

DimPlot_scCustom(seurat_object = LMN_Final,figure_plot = TRUE, label = T)

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

LMN_Final_Map1 <- read.csv("CST_cluster_markers.csv", sep = ",")

Idents(LMN_Final) <- "seurat_clusters"
levels(LMN_Final)

##  [1] "0"  "1"  "4"  "11" "13" "5"  "17" "16" "15" "3"  "14" "10" "2"  "9"  "7" 
## [16] "12" "8"  "6"  "18" "19"

LMN_Final@meta.data$Cort_v_Subcort <- plyr::mapvalues(
  x = LMN_Final@meta.data$seurat_clusters,
  from = LMN_Final_Map1$seurat_clusters,
  to = LMN_Final_Map1$Cort_v_Subcort)

Idents(LMN_Final) <- "Cort_v_Subcort"
levels(LMN_Final)

## [1] "Cortical"    "Subcortical"

DimPlot_scCustom(seurat_object = LMN_Final,figure_plot = TRUE, label = T)

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

Idents(LMN_Final) <- "seurat_clusters"
levels(LMN_Final)

##  [1] "0"  "1"  "4"  "11" "13" "5"  "17" "16" "15" "3"  "14" "10" "2"  "9"  "7" 
## [16] "12" "8"  "6"  "18" "19"

DimPlot_scCustom(seurat_object = LMN_Final,figure_plot = TRUE, label = T)

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

LMN_Final@meta.data$manual.clusters.2 <- plyr::mapvalues(
  x = LMN_Final@meta.data$seurat_clusters,
  from = LMN_Final_Map1$seurat_clusters,
  to = LMN_Final_Map1$manual.cluster.2)

Idents(LMN_Final) <- "manual.clusters.2"
levels(LMN_Final)

## [1] "CST"   "HYP"   "RN"    "Other" "DP"    "MED"

DimPlot_scCustom(seurat_object = LMN_Final,figure_plot = TRUE, label = T)

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

LMN_Final_Order2 <- read.csv("2_Input_Lists/LMN_Final_Order2.csv",sep = ",")

LMN_Final@meta.data$manual.clusters.2 <- factor(
  x = LMN_Final@meta.data$manual.clusters.2,
  levels = LMN_Final_Order2$Order)

Idents(LMN_Final) <- "manual.clusters.2"
levels(LMN_Final)

## [1] "CST"   "HYP"   "RN"    "DP"    "MED"   "Other"

DimPlot_scCustom(seurat_object = LMN_Final,figure_plot = TRUE, label = T)

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

LMN_Final <- JoinLayers(LMN_Final)

LMN_CST_InjVUninj <- FindMarkers(LMN_Final,
                                 subset.ident = "CST",
                                 group.by = "injury.status",
                                 ident.1 = "injured",
                                 ident.2 = "uninjured",
                                 logfc.threshold = 0,
                                 min.pct = 0.001)


write.csv(x = LMN_CST_InjVUninj, 
          file = "LMN_CST_InjvUninj.csv",
          quote = FALSE)


LMN_HYP_InjVUninj <- FindMarkers(LMN_Final,
                                 subset.ident = "HYP",
                                 group.by = "injury.status",
                                 ident.1 = "injured",
                                 ident.2 = "uninjured",
                                 logfc.threshold = 0,
                                 min.pct = 0.001)


write.csv(x = LMN_HYP_InjVUninj, 
          file = "LMN_HYP_InjvUninj.csv",
          quote = FALSE)

LMN_RN_InjVUninj <- FindMarkers(LMN_Final,
                                 subset.ident = "RN",
                                 group.by = "injury.status",
                                 ident.1 = "injured",
                                 ident.2 = "uninjured",
                                 logfc.threshold = 0,
                                 min.pct = 0.001)


write.csv(x = LMN_RN_InjVUninj, 
          file = "LMN_RN_InjvUninj.csv",
          quote = FALSE)


LMN_DP_InjVUninj <- FindMarkers(LMN_Final,
                                subset.ident = "DP",
                                group.by = "injury.status",
                                ident.1 = "injured",
                                ident.2 = "uninjured",
                                logfc.threshold = 0,
                                min.pct = 0.001)


write.csv(x = LMN_DP_InjVUninj, 
          file = "LMN_DP_InjvUninj.csv",
          quote = FALSE)

LMN_MED_InjVUninj <- FindMarkers(LMN_Final,
                                subset.ident = "MED",
                                group.by = "injury.status",
                                ident.1 = "injured",
                                ident.2 = "uninjured",
                                logfc.threshold = 0,
                                min.pct = 0.001)


write.csv(x = LMN_MED_InjVUninj, 
          file = "LMN_MED_InjvUninj.csv",
          quote = FALSE)

LMN_ALL_InjVUninj <- FindMarkers(LMN_Final,
                                 group.by = "injury.status",
                                 ident.1 = "injured",
                                 ident.2 = "uninjured",
                                 logfc.threshold = 0,
                                 min.pct = 0.001)


write.csv(x = LMN_ALL_InjVUninj, 
          file = "LMN_ALL_InjvUninj.csv",
          quote = FALSE)

DimPlot_scCustom(seurat_object = LMN_Final,figure_plot = TRUE, label = T)

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

Figure_1_data_analysis

2024-01-05