load in 2 samples of each condition so that this code can run on my computer
data_dir ="/Users/rorihoover/Desktop/BME494HW/Eutopic/sample8/outs/filtered_feature_bc_matrix/"
data <- Read10X(data.dir = data_dir)
Eutopic_seurat = CreateSeuratObject(counts = data)
data_dir ="/Users/rorihoover/Desktop/BME494HW/Eutopic/sample24/outs/filtered_feature_bc_matrix/"
data <- Read10X(data.dir = data_dir)
seurat_object = CreateSeuratObject(counts = data)
Eutopic_seurat<- merge(Eutopic_seurat, y = seurat_object)
## Warning: Some cell names are duplicated across objects provided. Renaming to
## enforce unique cell names.
Eutopic_seurat<- merge(Eutopic_seurat, y = seurat_object)
rm(seurat_object)
rm(data)
#saveRDS(Eutopic_seurat, file = "Eutopic_seurat.rds")
Length<- length(Cells(Eutopic_seurat))
condition <- list(rep("Eutopic_Endometrium", Length))
meta_data <- data.frame(condition)
rownames(meta_data)<- Cells(Eutopic_seurat)
Eutopic_seurat<- AddMetaData(
object = Eutopic_seurat,
metadata = meta_data,
col.name = 'condition'
)
head(x = Eutopic_seurat[[]])
## orig.ident nCount_RNA nFeature_RNA condition
## AAACCCAAGCGTGCCT-1_1 SeuratProject 1443 551 Eutopic_Endometrium
## AAACCCAAGTTACGTC-1_1 SeuratProject 3404 950 Eutopic_Endometrium
## AAACCCACAACCCTCT-1_1 SeuratProject 1064 408 Eutopic_Endometrium
## AAACCCACATCACGGC-1_1 SeuratProject 4377 1301 Eutopic_Endometrium
## AAACCCAGTAAGTTGA-1_1 SeuratProject 1481 552 Eutopic_Endometrium
## AAACCCAGTATTCCGA-1_1 SeuratProject 9648 2430 Eutopic_Endometrium
data_dir ="/Users/rorihoover/Desktop/BME494HW/endo/sample5/outs/filtered_feature_bc_matrix/"
data <- Read10X(data.dir = data_dir)
Endo_seurat = CreateSeuratObject(counts = data)
data_dir ="/Users/rorihoover/Desktop/BME494HW/endo/sample16/outs/filtered_feature_bc_matrix/"
data <- Read10X(data.dir = data_dir)
seurat_object = CreateSeuratObject(counts = data)
Endo_seurat<- merge(Endo_seurat, y = seurat_object)
## Warning: Some cell names are duplicated across objects provided. Renaming to
## enforce unique cell names.
rm(seurat_object)
rm(data)
#saveRDS(Endo_seurat, file = "Endo_seurat.rds")
Length<- length(Cells(Endo_seurat))
condition <- list(rep("Endometriosis", Length))
meta_data <- data.frame(condition)
rownames(meta_data)<- Cells(Endo_seurat)
Endo_seurat<- AddMetaData(
object = Endo_seurat,
metadata = meta_data,
col.name = 'condition'
)
rm(meta_data)
rm(Length)
rm(condition)
seurat<- merge(Endo_seurat, y = Eutopic_seurat)
## Warning: Some cell names are duplicated across objects provided. Renaming to
## enforce unique cell names.
rm(Endo_seurat)
rm(Eutopic_seurat)
head(x = seurat[[]])
## orig.ident nCount_RNA nFeature_RNA condition
## AAACCCAAGGATATGT-1_1_1 SeuratProject 5091 2108 Endometriosis
## AAACCCACAGTCCCGA-1_1_1 SeuratProject 3688 862 Endometriosis
## AAACGAACAGAGGAAA-1_1_1 SeuratProject 4693 1321 Endometriosis
## AAACGAACATACCGTA-1_1_1 SeuratProject 611 353 Endometriosis
## AAACGAATCACAAGAA-1_1_1 SeuratProject 1264 359 Endometriosis
## AAACGCTCACTGGAAG-1_1_1 SeuratProject 3379 1067 Endometriosis
seurat[["percent.mt"]] <- PercentageFeatureSet(seurat, pattern = "^MT-")
VlnPlot(seurat, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
FeatureScatter(seurat, feature1 = "nCount_RNA", feature2 = "percent.mt")+
geom_hline(yintercept = 5, linetype = "dashed", color = "black")
FeatureScatter(seurat, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")+
geom_hline(yintercept = 2500, linetype = "dashed", color = "black")+
geom_hline(yintercept = 200, linetype = "dashed", color = "black")
seurat <- subset(seurat, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
seurat[["percent.mt"]] <- PercentageFeatureSet(seurat, pattern = "^MT-")
VlnPlot(seurat, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
seurat <- RunAzimuth(seurat, reference = "pbmcref")
## Warning: Overwriting miscellanous data for model
## Warning: Adding a dimensional reduction (refUMAP) without the associated assay
## being present
## Warning: Adding a dimensional reduction (refUMAP) without the associated assay
## being present
## detected inputs from HUMAN with id type Gene.name
## reference rownames detected HUMAN with id type Gene.name
## Normalizing query using reference SCT model
## Warning: No layers found matching search pattern provided
## Projecting cell embeddings
## Finding query neighbors
## Finding neighborhoods
## Finding anchors
## Found 3392 anchors
## Finding integration vectors
## Finding integration vector weights
## Predicting cell labels
## Predicting cell labels
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Predicting cell labels
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
##
## Integrating dataset 2 with reference dataset
## Finding integration vectors
## Integrating data
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from integrated_dr_ to integrateddr_
## Computing nearest neighbors
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## Running UMAP projection
## Warning in RunUMAP.default(object = neighborlist, reduction.model =
## reduction.model, : Number of neighbors between query and reference is not equal
## to the number of neighbors within reference
## 21:43:46 Read 6303 rows
## 21:43:46 Processing block 1 of 1
## 21:43:46 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 20
## 21:43:46 Initializing by weighted average of neighbor coordinates using 1 thread
## 21:43:46 Commencing optimization for 67 epochs, with 126060 positive edges
## 21:43:48 Finished
## Warning: No assay specified, setting assay as RNA by default.
## Projecting reference PCA onto query
## Finding integration vector weights
## Projecting back the query cells into original PCA space
## Finding integration vector weights
## Computing scores:
## Finding neighbors of original query cells
## Finding neighbors of transformed query cells
## Computing query SNN
## Determining bandwidth and computing transition probabilities
## Total elapsed time: 6.29784488677979
seurat
## An object of class Seurat
## 33633 features across 6303 samples within 4 assays
## Active assay: RNA (33538 features, 0 variable features)
## 5 layers present: counts.1.1, counts.1.1.2, counts.2.1, counts.2.2, counts.2.1.2
## 3 other assays present: prediction.score.celltype.l1, prediction.score.celltype.l2, prediction.score.celltype.l3
## 2 dimensional reductions calculated: integrated_dr, ref.umap
seurat <- NormalizeData(seurat)
## Normalizing layer: counts.1.1
## Normalizing layer: counts.1.1.2
## Normalizing layer: counts.2.1
## Normalizing layer: counts.2.2
## Normalizing layer: counts.2.1.2
seurat <- FindVariableFeatures(seurat)
## Finding variable features for layer counts.1.1
## Finding variable features for layer counts.1.1.2
## Finding variable features for layer counts.2.1
## Finding variable features for layer counts.2.2
## Finding variable features for layer counts.2.1.2
seurat <- ScaleData(seurat)
## Centering and scaling data matrix
seurat <- RunPCA(seurat)
## PC_ 1
## Positive: DCN, COL1A2, COL6A2, COL1A1, C1S, MGP, C1R, CCDC80, SERPINF1, LUM
## COL14A1, CALD1, COL6A1, SPARCL1, SERPING1, COL6A3, COL3A1, SPARC, OGN, FBLN1
## MMP2, C3, AEBP1, RARRES2, GSN, SELENOP, MFAP4, LRP1, TIMP2, IGFBP5
## Negative: PTPRC, SRGN, HCST, CD48, CD52, LAPTM5, LCP1, CORO1A, CXCR4, CD37
## IL32, CD3D, CD2, SH3BGRL3, GMFG, CD3E, TRBC2, TRAC, CYBA, CD53
## CCL5, LTB, SAMSN1, CYTIP, FYB1, EVI2B, CD69, ITGB2, IL7R, GZMA
## PC_ 2
## Positive: IL32, CD3D, TRAC, CD3E, CD2, TRBC2, HBB, CCL5, HBA1, HBA2
## CD3G, LTB, GZMA, TRBC1, GZMM, CD52, IL7R, GZMK, CST7, CD247
## CTSW, KLRB1, SLPI, SPOCK2, CD7, SKAP1, NKG7, IKZF3, WFDC2, TBC1D10C
## Negative: LYZ, FCN1, S100A9, IL1B, S100A8, CXCL8, EREG, MNDA, BCL2A1, S100A12
## CCL20, CLEC4E, PLAUR, FCER1G, IL1A, AIF1, CTSS, CYBB, C5AR1, CLEC7A
## TYROBP, CFP, OLR1, PHACTR1, CD14, KYNU, TNFAIP6, SERPINB2, LST1, AQP9
## PC_ 3
## Positive: MALAT1, PTPRC, CXCR4, IL32, ZFP36L2, CD3D, CD2, TSC22D3, CD3E, SRSF7
## NEAT1, CCL5, CST7, CD69, GZMA, CD52, TRBC2, TRAC, MCL1, CYTIP
## CREM, CEMIP2, ARL4C, HSP90AA1, HCST, SRGN, IL7R, CD48, SPOCK2, GZMM
## Negative: WFDC2, SLPI, TFF3, FTL, DEFB1, SCGB2A1, HBA1, HBA2, AGR2, HBB
## KRT18, PIGR, CAPS, FXYD3, SCGB1D2, ASRGL1, C19orf33, HLA-DRA, SMIM22, KRT8
## AIF1, LYZ, C1QA, FTH1, CD68, CD24, CRIP2, C1QB, PRR15, SOX17
## PC_ 4
## Positive: SFRP2, S100A4, CCDC80, IGFBP6, FBLN2, GPNMB, DPT, FBLN1, CHRDL1, MFAP4
## ADH1B, CFD, C16orf89, CPE, PCOLCE2, MGST1, SRPX, PDGFRL, CST3, LUM
## PI16, MFAP5, CTHRC1, C1QTNF3, PRRX1, SCARA5, SLIT3, GSN, LMO3, CYBA
## Negative: JUNB, ZFP36, FOSB, SERPINE1, NR4A1, CLDN5, PTPRB, VWF, ADGRL4, LIFR
## C11orf96, MCL1, AQP1, EPAS1, CAV1, EMCN, LMNA, SEMA6A, EMP1, TSC22D1
## CDH5, SOCS3, MIR222HG, CAVIN2, TM4SF1, COL4A1, CLEC14A, ADGRF5, EGFL7, MYC
## PC_ 5
## Positive: PTPRB, ADGRL4, CLDN5, AQP1, CDH5, EMCN, VWF, CLEC14A, ADGRF5, EGFL7
## PECAM1, ROBO4, A2M, TM4SF18, CAVIN2, ECSCR, RAMP2, CXorf36, ARHGAP29, CALCRL
## MMRN2, CYYR1, TXNIP, PODXL, RAPGEF5, TM4SF1, FLT1, EPAS1, ESAM, S1PR1
## Negative: FOSB, SERPINE1, JUNB, EGR1, ZFP36, IER3, CEBPB, NR4A1, PPP1R15A, HBA2
## C11orf96, HBB, HBA1, CCL2, FOS, ERRFI1, IER2, ATF3, KRT19, SOD2
## PTGDS, SLPI, WFDC2, KDM6B, SOCS3, EGR3, TNFRSF12A, KLF10, GEM, CXCL2
VizDimLoadings(seurat, dims = 1:2, reduction = "pca")
DimPlot(seurat, reduction = "pca", group.by = "condition")
ElbowPlot(seurat)
seurat <- FindNeighbors(seurat, dims = 1:10, reduction = "pca")
## Computing nearest neighbor graph
## Computing SNN
seurat<- FindClusters(seurat, resolution = 0.1, cluster.name = "unintegrated_clusters")
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6303
## Number of edges: 203479
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9691
## Number of communities: 9
## Elapsed time: 0 seconds
seurat <- RunUMAP(seurat, dims = 1:10, reduction = "pca", reduction.name = "umap.unintegrated")
## 21:44:26 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:44:26 Read 6303 rows and found 10 numeric columns
## 21:44:26 Using Annoy for neighbor search, n_neighbors = 30
## 21:44:26 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:44:27 Writing NN index file to temp file /var/folders/hv/xklkw9n15xgbktqz58g7gtbw0000gn/T//RtmpFgmubG/filed878290cbb04
## 21:44:27 Searching Annoy index using 1 thread, search_k = 3000
## 21:44:29 Annoy recall = 100%
## 21:44:30 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 21:44:32 Initializing from normalized Laplacian + noise (using RSpectra)
## 21:44:32 Commencing optimization for 500 epochs, with 251694 positive edges
## 21:44:45 Optimization finished
DimPlot(
seurat,
reduction = "umap.unintegrated",
group.by = "condition",
combine = FALSE, label.size = 2
)
## [[1]]
DimPlot(seurat, reduction = "umap.unintegrated")
# visualize by batch and cell type annotation
# cell type annotations were previously added by Azimuth
DimPlot(seurat, reduction = "umap.unintegrated", group.by = "predicted.celltype.l2")
#this will take a long time
seurat <- IntegrateLayers(object = seurat, method = CCAIntegration, new.reduction = "integrated.cca", verbose = F)
seurat <- FindNeighbors(seurat, reduction = "integrated.cca", dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
seurat <- FindClusters(seurat, resolution = 0.1, cluster.name = "integrated_clusters")
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6303
## Number of edges: 212648
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9604
## Number of communities: 6
## Elapsed time: 0 seconds
seurat <- RunUMAP(seurat, reduction = "integrated.cca", dims = 1:10, reduction.name = "umap.cca")
## 21:46:23 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:46:23 Read 6303 rows and found 10 numeric columns
## 21:46:23 Using Annoy for neighbor search, n_neighbors = 30
## 21:46:23 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:46:24 Writing NN index file to temp file /var/folders/hv/xklkw9n15xgbktqz58g7gtbw0000gn/T//RtmpFgmubG/filed87854af8917
## 21:46:24 Searching Annoy index using 1 thread, search_k = 3000
## 21:46:27 Annoy recall = 100%
## 21:46:28 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 21:46:30 Initializing from normalized Laplacian + noise (using RSpectra)
## 21:46:30 Commencing optimization for 500 epochs, with 257932 positive edges
## 21:46:43 Optimization finished
DimPlot(
seurat,
reduction = "umap.cca",
group.by = "condition",
combine = FALSE, label.size = 2
)
## [[1]]
DimPlot(
seurat,
reduction = "umap.cca",
group.by = "predicted.celltype.l2",
combine = FALSE, label.size = 2
)
## [[1]]
DimPlot(seurat, reduction = "umap.cca")
# re-join layers after integration
seurat[["RNA"]] <- JoinLayers(seurat[["RNA"]])
seurat
## An object of class Seurat
## 33633 features across 6303 samples within 4 assays
## Active assay: RNA (33538 features, 2000 variable features)
## 3 layers present: data, counts, scale.data
## 3 other assays present: prediction.score.celltype.l1, prediction.score.celltype.l2, prediction.score.celltype.l3
## 6 dimensional reductions calculated: integrated_dr, ref.umap, pca, umap.unintegrated, integrated.cca, umap.cca
markers0 <- FindMarkers(
object = seurat,
ident.1 = "0",
ident.2 = NULL,
only.pos = TRUE
)
markers1 <- FindMarkers(
object = seurat,
ident.1 = "1",
ident.2 = NULL,
only.pos = TRUE
)
markers2 <- FindMarkers(
object = seurat,
ident.1 = "2",
ident.2 = NULL,
only.pos = TRUE
)
markers3 <- FindMarkers(
object = seurat,
ident.1 = "3",
ident.2 = NULL,
only.pos = TRUE
)
markers4 <- FindMarkers(
object = seurat,
ident.1 = "4",
ident.2 = NULL,
only.pos = TRUE
)
markers5 <- FindMarkers(
object = seurat,
ident.1 = "5",
ident.2 = "0",
only.pos = TRUE
)
seurat$cond_celltype <- paste(seurat$condition, seurat$integrated_clusters, sep = "_")
Idents(seurat) <- "cond_celltype"
table(seurat$cond_celltype)
##
## Endometriosis_0 Endometriosis_1 Endometriosis_2
## 1051 1205 399
## Endometriosis_3 Endometriosis_4 Endometriosis_5
## 88 98 20
## Eutopic_Endometrium_0 Eutopic_Endometrium_1 Eutopic_Endometrium_2
## 1367 1053 300
## Eutopic_Endometrium_3 Eutopic_Endometrium_4 Eutopic_Endometrium_5
## 371 301 50
cond_celltype_table <- table(seurat$cond_celltype)
cond_celltype_df <- as.data.frame(cond_celltype_table)
colnames(cond_celltype_df) <- c("Cluster", "Count")
cond_celltype_df <- cond_celltype_df %>%
mutate(Condition = ifelse(grepl("Endometriosis", Cluster, ignore.case = TRUE),
"Endometriosis",
"Eutopic_Endometrium"))
cond_celltype_df$Cluster <- sub("^[^0-9]+", "", cond_celltype_df$Cluster)
print(cond_celltype_df)
## Cluster Count Condition
## 1 0 1051 Endometriosis
## 2 1 1205 Endometriosis
## 3 2 399 Endometriosis
## 4 3 88 Endometriosis
## 5 4 98 Endometriosis
## 6 5 20 Endometriosis
## 7 0 1367 Eutopic_Endometrium
## 8 1 1053 Eutopic_Endometrium
## 9 2 300 Eutopic_Endometrium
## 10 3 371 Eutopic_Endometrium
## 11 4 301 Eutopic_Endometrium
## 12 5 50 Eutopic_Endometrium
ggplot(cond_celltype_df, aes(x = Cluster, y = Count, fill = Condition)) +
geom_bar(stat = "identity") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = "Cells Present in Each Cluster", x = "Cluster", y = "Count")
perts <- cond_celltype_df %>%
group_by(Condition) %>%
mutate(Total = sum(Count), Percentage = (Count / Total) * 100) %>%
select(Cluster, Count, Condition, Percentage)
head(perts, 12)
## # A tibble: 12 × 4
## # Groups: Condition [2]
## Cluster Count Condition Percentage
## <chr> <int> <chr> <dbl>
## 1 0 1051 Endometriosis 36.7
## 2 1 1205 Endometriosis 42.1
## 3 2 399 Endometriosis 13.9
## 4 3 88 Endometriosis 3.08
## 5 4 98 Endometriosis 3.43
## 6 5 20 Endometriosis 0.699
## 7 0 1367 Eutopic_Endometrium 39.7
## 8 1 1053 Eutopic_Endometrium 30.6
## 9 2 300 Eutopic_Endometrium 8.72
## 10 3 371 Eutopic_Endometrium 10.8
## 11 4 301 Eutopic_Endometrium 8.74
## 12 5 50 Eutopic_Endometrium 1.45
cond_markers0 <- FindMarkers(seurat, ident.1 = "Endometriosis_0", ident.2 = "Eutopic_Endometrium_0", verbose = FALSE)
cond_markers1 <- FindMarkers(seurat, ident.1 = "Endometriosis_1", ident.2 = "Eutopic_Endometrium_1", verbose = FALSE)
cond_markers2 <- FindMarkers(seurat, ident.1 = "Endometriosis_2", ident.2 = "Eutopic_Endometrium_2", verbose = FALSE)
cond_markers3 <- FindMarkers(seurat, ident.1 = "Endometriosis_3", ident.2 = "Eutopic_Endometrium_3", verbose = FALSE)
cond_markers4 <- FindMarkers(seurat, ident.1 = "Endometriosis_4", ident.2 = "Eutopic_Endometrium_4", verbose = FALSE)
cond_markers5 <- FindMarkers(seurat, ident.1 = "Endometriosis_5", ident.2 = "Eutopic_Endometrium_5", verbose = FALSE)
library("knitr")
top_and_bot <- function(ldf, bot = TRUE) {
ldf <- ldf[order(ldf$avg_log2FC, decreasing = TRUE), ]
if (bot) {
top_3 <- head(ldf, 3)
result <- top_3
}
top_5 <- head(ldf, 5)
result <- top_5
if (bot) {
bottom_3 <- tail(ldf, 3)
result <- rbind(result, bottom_3)
}
print(knitr::kable(result, format = "simple"))
return(result)
}
top_and_bot(markers0, bot = FALSE)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## ----------- ------ ----------- ------ ------ ----------
## CILP2 0 9.394701 0.036 0.000 0
## FREM1 0 9.047897 0.043 0.000 0
## AC007920.2 0 8.773420 0.020 0.000 0
## MEG8 0 8.688816 0.108 0.001 0
## COL6A6 0 8.428849 0.020 0.000 0
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## CILP2 4.907295e-33 9.394701 0.036 0.000 1.645808e-28
## FREM1 1.832568e-38 9.047897 0.043 0.000 6.146067e-34
## AC007920.2 1.200183e-18 8.773420 0.020 0.000 4.025173e-14
## MEG8 7.029721e-96 8.688816 0.108 0.001 2.357628e-91
## COL6A6 5.255316e-19 8.428849 0.020 0.000 1.762528e-14
#genes from find marker genes cluster 0 B- cells IGLC2, IGKC, IGHG1 ???
FeaturePlot(seurat, features = c("CILP2","FREM1"), reduction = "umap.cca", split.by = "condition")
top_and_bot(cond_markers0)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## ---------- ------ ----------- ------ ------ ----------
## PROK1 0 10.448529 0.157 0.000 0e+00
## SPRR2F 0 10.113882 0.102 0.000 0e+00
## GSTA1 0 9.768453 0.037 0.000 0e+00
## GATA4 0 8.569486 0.070 0.000 0e+00
## LINC00473 0 8.270741 0.059 0.000 0e+00
## SCGB2A1 0 -9.261560 0.000 0.045 1e-07
## MUC5B 0 -9.351723 0.000 0.048 0e+00
## SCGB3A1 0 -11.124677 0.000 0.143 0e+00
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## PROK1 6.437929e-52 10.448529 0.157 0.000 2.159153e-47
## SPRR2F 1.709378e-33 10.113882 0.102 0.000 5.732913e-29
## GSTA1 7.052018e-13 9.768453 0.037 0.000 2.365106e-08
## GATA4 2.267648e-23 8.569486 0.070 0.000 7.605238e-19
## LINC00473 9.508400e-20 8.270741 0.059 0.000 3.188927e-15
## SCGB2A1 4.089799e-12 -9.261560 0.000 0.045 1.371637e-07
## MUC5B 5.192497e-13 -9.351723 0.000 0.048 1.741460e-08
## SCGB3A1 3.025745e-37 -11.124677 0.000 0.143 1.014775e-32
#proliferative phase marker genes
FeaturePlot(seurat, features = c("PGR", "ESR1"), reduction = "umap.cca",split.by = "condition")
#proliferative phase marker genes
FeaturePlot(seurat, features = c("IGF1", "MMP11"), reduction = "umap.cca",split.by = "condition")
#proliferative phase marker genes
FeaturePlot(seurat, features = c("CRABP2", "ECM1"), reduction = "umap.cca", split.by = "condition")
top_and_bot(markers1, bot = FALSE)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## ---------- ------ ----------- ------ ------ ----------
## SIRPG 0 10.210382 0.089 0 0
## LINC02273 0 9.847718 0.068 0 0
## TRBV28 0 9.731967 0.034 0 0
## UBASH3A 0 9.634712 0.061 0 0
## C15orf53 0 9.378756 0.047 0 0
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## SIRPG 2.032893e-82 10.210382 0.089 0 6.817918e-78
## LINC02273 5.020953e-63 9.847719 0.068 0 1.683927e-58
## TRBV28 8.144850e-32 9.731967 0.034 0 2.731620e-27
## UBASH3A 1.742459e-56 9.634712 0.061 0 5.843860e-52
## C15orf53 2.655284e-44 9.378756 0.047 0 8.905293e-40
#genes from find marker genes cluster 1 T- cell CD3D, TRAC, CD3E, TRBC2
FeaturePlot(seurat, features = c("CD3D","TRAC"), reduction = "umap.cca", split.by = "condition")
# PAEP and SPP1 (secretory phase genes) are up in endometriosis compared to Eutopic t-cells
# also up is COLGALT2 wich is overexressed in ovarian cancer
top_and_bot(cond_markers1)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## --------- ---------- ----------- ------ ------ ----------
## IGLC3 0.0000000 11.316918 0.045 0.000 0.0000001
## SPP1 0.0000000 8.158166 0.060 0.000 0.0000000
## RHEX 0.0002818 7.491035 0.012 0.000 1.0000000
## COLGALT2 0.0000169 6.296506 0.017 0.000 0.5659832
## CLDN11 0.0007269 6.256431 0.011 0.000 1.0000000
## WFDC2 0.0000000 -8.493679 0.002 0.249 0.0000000
## SCGB3A1 0.0000000 -8.528897 0.001 0.203 0.0000000
## SLPI 0.0000000 -9.403687 0.002 0.417 0.0000000
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## IGLC3 3.635213e-12 11.316918 0.045 0.000 1.219178e-07
## SPP1 7.733389e-16 8.158165 0.060 0.000 2.593624e-11
## RHEX 2.817724e-04 7.491035 0.012 0.000 1.000000e+00
## COLGALT2 1.687588e-05 6.296506 0.017 0.000 5.659832e-01
## CLDN11 7.269314e-04 6.256431 0.011 0.000 1.000000e+00
## WFDC2 5.349628e-74 -8.493679 0.002 0.249 1.794158e-69
## SCGB3A1 6.406948e-60 -8.528897 0.001 0.203 2.148762e-55
## SLPI 4.123295e-134 -9.403687 0.002 0.417 1.382871e-129
FeaturePlot(seurat, features = c("GATA4","PROK1"), reduction = "umap.cca", split.by = "condition")
## Warning: All cells have the same value (0) of "GATA4"
## Warning: All cells have the same value (0) of "PROK1"
# up in endo is genes associated with fetal growth PROK1 and GATA4, and also Hylaruon synthase HAS1
# down in endo is cell growth inhibitor SCGB3A1, IGHM and MUC5B
top_and_bot(markers2, bot = FALSE)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## -------- ------ ----------- ------ ------ ----------
## MCEMP1 0 11.27867 0.094 0 0
## LILRA5 0 10.42753 0.070 0 0
## ACOD1 0 10.19312 0.050 0 0
## CD300LB 0 10.16492 0.057 0 0
## CD300E 0 10.10868 0.120 0 0
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## MCEMP1 2.784776e-118 11.27867 0.094 0 9.339581e-114
## LILRA5 4.419314e-88 10.42753 0.070 0 1.482149e-83
## ACOD1 2.595595e-63 10.19312 0.050 0 8.705108e-59
## CD300LB 3.783834e-72 10.16492 0.057 0 1.269022e-67
## CD300E 2.646639e-148 10.10868 0.120 0 8.876297e-144
#genes from find marker genes cluster 2 macrophage
FeaturePlot(seurat, features = c("CD14","CD68"), reduction = "umap.cca", split.by = "condition")
top_and_bot(cond_markers2)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## ----------- ---------- ----------- ------ ------ ----------
## CHIT1 0.0000000 11.593705 0.140 0.000 0.0000005
## CCL18 0.0000002 9.920340 0.085 0.000 0.0075477
## CHI3L1 0.0000000 9.900358 0.098 0.000 0.0008778
## CCL13 0.0037831 8.704779 0.028 0.000 1.0000000
## AC015660.2 0.0000103 8.523929 0.063 0.000 0.3458789
## MUC5B 0.0000568 -7.290253 0.000 0.040 1.0000000
## SCGB3A1 0.0000000 -8.876590 0.000 0.087 0.0000718
## HBG2 0.0456419 -9.463383 0.000 0.010 1.0000000
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## CHIT1 1.453863e-11 11.593705 0.140 0.000 4.875965e-07
## CCL18 2.250497e-07 9.920340 0.085 0.000 7.547718e-03
## CHI3L1 2.617369e-08 9.900358 0.098 0.000 8.778132e-04
## CCL13 3.783077e-03 8.704779 0.028 0.000 1.000000e+00
## AC015660.2 1.031305e-05 8.523929 0.063 0.000 3.458789e-01
## MUC5B 5.677898e-05 -7.290253 0.000 0.040 1.000000e+00
## SCGB3A1 2.141507e-09 -8.876590 0.000 0.087 7.182186e-05
## HBG2 4.564194e-02 -9.463383 0.000 0.010 1.000000e+00
#no large diff
FeaturePlot(seurat, features = c("CCL18","HBG2"), reduction = "umap.cca", split.by = "condition")
top_and_bot(markers3, bot = FALSE)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## ---------- ------ ----------- ------ ------ ----------
## LINC01320 0 12.12859 0.081 0 0
## WDR38 0 11.34371 0.037 0 0
## TUBA4B 0 10.71207 0.028 0 0
## C9orf152 0 10.52318 0.041 0 0
## ANKRD66 0 10.48615 0.022 0 0
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## LINC01320 4.817969e-105 12.12859 0.081 0 1.615850e-100
## WDR38 4.100697e-49 11.34371 0.037 0 1.375292e-44
## TUBA4B 6.033155e-38 10.71207 0.028 0 2.023400e-33
## C9orf152 1.068082e-54 10.52318 0.041 0 3.582133e-50
## ANKRD66 1.445551e-29 10.48615 0.022 0 4.848091e-25
#genes from find marker genes cluster 3
FeaturePlot(seurat, features = c("LINC01320","WDR38"), reduction = "umap.cca", split.by = "condition")
top_and_bot(cond_markers2)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## ----------- ---------- ----------- ------ ------ ----------
## CHIT1 0.0000000 11.593705 0.140 0.000 0.0000005
## CCL18 0.0000002 9.920340 0.085 0.000 0.0075477
## CHI3L1 0.0000000 9.900358 0.098 0.000 0.0008778
## CCL13 0.0037831 8.704779 0.028 0.000 1.0000000
## AC015660.2 0.0000103 8.523929 0.063 0.000 0.3458789
## MUC5B 0.0000568 -7.290253 0.000 0.040 1.0000000
## SCGB3A1 0.0000000 -8.876590 0.000 0.087 0.0000718
## HBG2 0.0456419 -9.463383 0.000 0.010 1.0000000
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## CHIT1 1.453863e-11 11.593705 0.140 0.000 4.875965e-07
## CCL18 2.250497e-07 9.920340 0.085 0.000 7.547718e-03
## CHI3L1 2.617369e-08 9.900358 0.098 0.000 8.778132e-04
## CCL13 3.783077e-03 8.704779 0.028 0.000 1.000000e+00
## AC015660.2 1.031305e-05 8.523929 0.063 0.000 3.458789e-01
## MUC5B 5.677898e-05 -7.290253 0.000 0.040 1.000000e+00
## SCGB3A1 2.141507e-09 -8.876590 0.000 0.087 7.182186e-05
## HBG2 4.564194e-02 -9.463383 0.000 0.010 1.000000e+00
#big diff
FeaturePlot(seurat, features = c("MUC5B","PAEP"), reduction = "umap.cca", split.by = "condition")
## Warning: All cells have the same value (0) of "MUC5B"
#big diff
FeaturePlot(seurat, features = c("SCGB3A1","TFF3"), reduction = "umap.cca", split.by = "condition")
#secretory phase marker genes
FeaturePlot(seurat, features = c("PAEP","FOXO1"), reduction = "umap.cca", split.by = "condition")
top_and_bot(markers4, bot = FALSE)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## ---------- ------ ----------- ------ ------ ----------
## GPIHBP1 0 9.468329 0.100 0 0
## HID1-AS1 0 9.123262 0.013 0 0
## LINC00636 0 9.064349 0.020 0 0
## FOXC2 0 8.931556 0.013 0 0
## CAPN11 0 8.913801 0.015 0 0
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## GPIHBP1 1.256795e-124 9.468329 0.100 0 4.215038e-120
## HID1-AS1 7.705891e-18 9.123262 0.013 0 2.584402e-13
## LINC00636 1.345849e-27 9.064349 0.020 0 4.513708e-23
## FOXC2 7.705891e-18 8.931556 0.013 0 2.584402e-13
## CAPN11 4.264719e-21 8.913801 0.015 0 1.430301e-16
#cluster 4
FeaturePlot(seurat, features = c("GPIHBP1","HID1-AS1"), reduction = "umap.cca", split.by = "condition")
top_and_bot(markers4)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## ---------- ---------- ----------- ------ ------ ----------
## GPIHBP1 0.0000000 9.4683292 0.100 0.000 0
## HID1-AS1 0.0000000 9.1232622 0.013 0.000 0
## LINC00636 0.0000000 9.0643493 0.020 0.000 0
## FOXC2 0.0000000 8.9315560 0.013 0.000 0
## CAPN11 0.0000000 8.9138007 0.015 0.000 0
## MIDN 0.0317405 0.1001424 0.105 0.152 1
## RPL18A 0.3427599 0.1000908 0.952 0.949 1
## DESI1 0.0262904 0.1000488 0.033 0.061 1
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## GPIHBP1 1.256795e-124 9.4683292 0.100 0.000 4.215038e-120
## HID1-AS1 7.705891e-18 9.1232622 0.013 0.000 2.584402e-13
## LINC00636 1.345849e-27 9.0643493 0.020 0.000 4.513708e-23
## FOXC2 7.705891e-18 8.9315560 0.013 0.000 2.584402e-13
## CAPN11 4.264719e-21 8.9138007 0.015 0.000 1.430301e-16
## MIDN 3.174049e-02 0.1001424 0.105 0.152 1.000000e+00
## RPL18A 3.427599e-01 0.1000908 0.952 0.949 1.000000e+00
## DESI1 2.629036e-02 0.1000488 0.033 0.061 1.000000e+00
#cluster 4
FeaturePlot(seurat, features = c("HMOX1","PLA2G2A"), reduction = "umap.cca", split.by = "condition")
top_and_bot(markers5, bot = FALSE)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## ------- ------ ----------- ------ ------ ----------
## SCN3A 0 10.729229 0.143 0 0
## HRH2 0 10.433841 0.157 0 0
## OR51E2 0 10.077810 0.100 0 0
## DGKG 0 10.073699 0.100 0 0
## ANO3 0 9.769803 0.071 0 0
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## SCN3A 2.270273e-77 10.729229 0.143 0 7.614042e-73
## HRH2 5.942715e-85 10.433842 0.157 0 1.993068e-80
## OR51E2 1.205328e-54 10.077810 0.100 0 4.042427e-50
## DGKG 1.205328e-54 10.073699 0.100 0 4.042427e-50
## ANO3 1.666858e-39 9.769803 0.071 0 5.590308e-35
#cluster 5
FeaturePlot(seurat, features = c("SCN3A","HRH2"), reduction = "umap.cca", split.by = "condition")
## Warning: All cells have the same value (0) of "SCN3A"
top_and_bot(cond_markers5)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## -------- ---------- ----------- ------ ------ ----------
## C7 0.0000005 7.893226 0.45 0.00 0.0175086
## MEG3 0.0000626 7.509323 0.30 0.00 1.0000000
## LAMB1 0.0000005 7.340226 0.45 0.00 0.0175086
## IER3 0.0000132 6.775525 0.35 0.00 0.4410753
## COLEC11 0.0000132 6.526857 0.35 0.00 0.4410753
## HBB 0.0314106 -5.997063 0.20 0.44 1.0000000
## HBA2 0.0051733 -6.064125 0.15 0.48 1.0000000
## RERGL 0.0000020 -8.381696 0.00 0.68 0.0686126
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## C7 5.220539e-07 7.893226 0.45 0.00 0.01750864
## MEG3 6.259361e-05 7.509323 0.30 0.00 1.00000000
## LAMB1 5.220539e-07 7.340226 0.45 0.00 0.01750864
## IER3 1.315151e-05 6.775525 0.35 0.00 0.44107526
## COLEC11 1.315151e-05 6.526857 0.35 0.00 0.44107526
## HBB 3.141060e-02 -5.997063 0.20 0.44 1.00000000
## HBA2 5.173346e-03 -6.064124 0.15 0.48 1.00000000
## RERGL 2.045818e-06 -8.381696 0.00 0.68 0.06861264
#cluster 5
FeaturePlot(seurat, features = c("C7","RERGL"), reduction = "umap.cca", split.by = "condition")