load in 2 samples of each condition so that this code can run on my computer
data_dir ="/Users/rorihoover/Desktop/BME494HW/Eutopic/sample30/outs/filtered_feature_bc_matrix/"
data <- Read10X(data.dir = data_dir)
Eutopic_seurat = CreateSeuratObject(counts = data)
data_dir ="/Users/rorihoover/Desktop/BME494HW/Eutopic/sample31/outs/filtered_feature_bc_matrix/"
data <- Read10X(data.dir = data_dir)
seurat_object = CreateSeuratObject(counts = data)
Eutopic_seurat<- merge(Eutopic_seurat, y = seurat_object)
## Warning: Some cell names are duplicated across objects provided. Renaming to
## enforce unique cell names.
Eutopic_seurat<- merge(Eutopic_seurat, y = seurat_object)
rm(seurat_object)
rm(data)
#saveRDS(Eutopic_seurat, file = "Eutopic_seurat.rds")
Length<- length(Cells(Eutopic_seurat))
condition <- list(rep("Eutopic_Endometrium", Length))
meta_data <- data.frame(condition)
rownames(meta_data)<- Cells(Eutopic_seurat)
Eutopic_seurat<- AddMetaData(
object = Eutopic_seurat,
metadata = meta_data,
col.name = 'condition'
)
head(x = Eutopic_seurat[[]])
## orig.ident nCount_RNA nFeature_RNA condition
## AAACCCAAGACCTTTG-1_1 SeuratProject 1057 523 Eutopic_Endometrium
## AAACCCAAGACTCTTG-1_1 SeuratProject 4052 1516 Eutopic_Endometrium
## AAACCCAAGAGAATCT-1_1 SeuratProject 4135 1424 Eutopic_Endometrium
## AAACCCAAGATGCAGC-1_1 SeuratProject 853 431 Eutopic_Endometrium
## AAACCCAAGATTCGCT-1_1 SeuratProject 5051 1823 Eutopic_Endometrium
## AAACCCAAGGTTGAGC-1_1 SeuratProject 750 422 Eutopic_Endometrium
data_dir ="/Users/rorihoover/Desktop/BME494HW/endo/sample7/outs/filtered_feature_bc_matrix/"
data <- Read10X(data.dir = data_dir)
Endo_seurat = CreateSeuratObject(counts = data)
data_dir ="/Users/rorihoover/Desktop/BME494HW/endo/sample10/outs/filtered_feature_bc_matrix/"
data <- Read10X(data.dir = data_dir)
seurat_object = CreateSeuratObject(counts = data)
Endo_seurat<- merge(Endo_seurat, y = seurat_object)
## Warning: Some cell names are duplicated across objects provided. Renaming to
## enforce unique cell names.
rm(seurat_object)
rm(data)
#saveRDS(Endo_seurat, file = "Endo_seurat.rds")
Length<- length(Cells(Endo_seurat))
condition <- list(rep("Endometriosis", Length))
meta_data <- data.frame(condition)
rownames(meta_data)<- Cells(Endo_seurat)
Endo_seurat<- AddMetaData(
object = Endo_seurat,
metadata = meta_data,
col.name = 'condition'
)
rm(meta_data)
rm(Length)
rm(condition)
seurat<- merge(Endo_seurat, y = Eutopic_seurat)
## Warning: Some cell names are duplicated across objects provided. Renaming to
## enforce unique cell names.
rm(Endo_seurat)
rm(Eutopic_seurat)
head(x = seurat[[]])
## orig.ident nCount_RNA nFeature_RNA condition
## AAACCCAAGCAAATCA-1_1_1 SeuratProject 826 454 Endometriosis
## AAACCCAAGGCTGAAC-1_1_1 SeuratProject 10295 3064 Endometriosis
## AAACCCAAGTCGCGAA-1_1_1 SeuratProject 4798 1840 Endometriosis
## AAACCCACAAGGTCGA-1_1_1 SeuratProject 2669 1410 Endometriosis
## AAACCCACAATCCTTT-1_1_1 SeuratProject 582 241 Endometriosis
## AAACCCACACGAGAAC-1_1_1 SeuratProject 7856 2544 Endometriosis
seurat[["percent.mt"]] <- PercentageFeatureSet(seurat, pattern = "^MT-")
VlnPlot(seurat, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
FeatureScatter(seurat, feature1 = "nCount_RNA", feature2 = "percent.mt")+
geom_hline(yintercept = 5, linetype = "dashed", color = "black")
FeatureScatter(seurat, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")+
geom_hline(yintercept = 2500, linetype = "dashed", color = "black")+
geom_hline(yintercept = 200, linetype = "dashed", color = "black")
seurat <- subset(seurat, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
seurat[["percent.mt"]] <- PercentageFeatureSet(seurat, pattern = "^MT-")
VlnPlot(seurat, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
#strange error
#seurat <- RunAzimuth(seurat, reference = "pbmcref")
#seurat
seurat <- NormalizeData(seurat)
## Normalizing layer: counts.1.1
## Normalizing layer: counts.1.1.2
## Normalizing layer: counts.2.1
## Normalizing layer: counts.2.2
## Normalizing layer: counts.2.1.2
seurat <- FindVariableFeatures(seurat)
## Finding variable features for layer counts.1.1
## Finding variable features for layer counts.1.1.2
## Finding variable features for layer counts.2.1
## Finding variable features for layer counts.2.2
## Finding variable features for layer counts.2.1.2
seurat <- ScaleData(seurat)
## Centering and scaling data matrix
seurat <- RunPCA(seurat)
## PC_ 1
## Positive: DCN, COL1A1, LUM, COL3A1, IGFBP7, COL6A2, SPARCL1, FBLN1, COL1A2, SPARC
## C1R, COL6A1, IGFBP4, MMP2, CALD1, C1S, SERPINF1, LGALS1, PCOLCE, TIMP3
## COL6A3, SERPING1, SELENOP, TIMP2, GSN, IGFBP6, CST3, AEBP1, COL14A1, MGP
## Negative: SRGN, CXCR4, CD52, CCL5, CD48, CORO1A, LAPTM5, RAC2, CST7, IL32
## CD2, KLRB1, GZMA, TRBC2, CD7, CD3D, TRAC, CD69, GZMK, RUNX3
## STK17B, FYB1, IL7R, NKG7, TSC22D3, SAMSN1, BTG1, ISG20, LTB, TRBC1
## PC_ 2
## Positive: C11orf96, IGFBP1, RBP1, PAEP, APOE, RND3, IGFBP2, RAMP1, FOSB, TIMP3
## IGFBP3, NFKBIA, GNLY, ARC, IER3, TGM2, ATF3, GADD45G, IER2, TXN
## PPP1R15A, LEFTY2, VCAN, EGR1, RRAD, GADD45A, JUNB, SAT1, CXCL8, SERTAD1
## Negative: CCDC80, SFRP2, MGP, C7, SFRP1, S100A10, COL1A2, SFRP4, DPT, COL14A1
## GSN, CPE, MRC2, OGN, S100A4, IGFBP5, FGL2, PI16, CPVL, CFH
## C3, PLAC9, COLEC12, PLXDC2, MMP23B, PRELP, SVEP1, BOC, TNXB, PLTP
## PC_ 3
## Positive: LYZ, MS4A6A, AIF1, HLA-DRA, CYBB, HLA-DRB1, MS4A7, HLA-DQA1, FCGR2A, FCN1
## HLA-DPA1, HLA-DMA, LST1, MNDA, HLA-DPB1, CLEC7A, HLA-DQB1, SPI1, CTSS, RNASE6
## CD68, TYROBP, CSF1R, IFI30, C5AR1, IL1B, CD86, NCF2, CD74, HLA-DMB
## Negative: TRAC, IL32, CD3D, MGP, CD2, TRBC2, CCL5, SFRP4, CCDC80, TRBC1
## SFRP2, SFRP1, IL7R, GZMA, HBB, GZMK, HBA2, LTB, KLRB1, PLAC9
## CD8A, IKZF3, HBA1, PTGDS, CD7, CD8B, ADIRF, DPT, ITM2A, CLEC2D
## PC_ 4
## Positive: MALAT1, MT-CO1, CREM, NEAT1, MCL1, XCL2, CEMIP2, PIK3R1, HSP90AA1, ZFP36
## MYADM, XCL1, KLRD1, TSC22D3, FAM177A1, YPEL5, HSPH1, REL, SYTL3, HSPD1
## BTG1, CD7, WSB1, SRGN, NKG7, CXCR4, CRIP1, CST7, CHORDC1, CCL5
## Negative: HLA-DRA, AIF1, LYZ, FTL, S100A6, MS4A6A, HLA-DQA1, HLA-DPB1, HLA-DPA1, HLA-DQB1
## MNDA, HBB, CD74, LST1, FTH1, FCN1, SPI1, CYBB, TMSB10, CLEC10A
## S100A9, HLA-DRB1, HBA2, HLA-DMB, HLA-DMA, MPEG1, FCER1A, SERPINA1, LGALS2, LGALS1
## PC_ 5
## Positive: CAVIN2, CLDN5, ECSCR, EGFL7, EMCN, MMRN1, TM4SF18, CCL21, RAMP2, TM4SF1
## TIE1, ESAM, CDH5, PROX1, TINAGL1, ARHGAP29, A2M, SMAD1, TFF3, PTPRB
## VWF, GNG11, ROBO4, AQP1, PECAM1, PCAT19, FABP4, CRIP2, ADGRL4, BCAM
## Negative: DCN, FBLN1, LUM, COL1A1, LGALS1, OGN, MMP2, SFRP2, C3, VCAN
## SPON2, COL3A1, CFD, SERPINF1, C1R, IGFBP6, SFRP1, PCOLCE, OLFML3, S100A4
## PTGDS, PI16, ISLR, MRC2, PDGFRL, CEBPB, C1S, GPNMB, CCDC80, S100A6
VizDimLoadings(seurat, dims = 1:2, reduction = "pca")
DimPlot(seurat, reduction = "pca", group.by = "condition")
ElbowPlot(seurat)
seurat <- FindNeighbors(seurat, dims = 1:10, reduction = "pca")
## Computing nearest neighbor graph
## Computing SNN
seurat<- FindClusters(seurat, resolution = 0.1, cluster.name = "unintegrated_clusters")
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10255
## Number of edges: 332360
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9734
## Number of communities: 9
## Elapsed time: 1 seconds
seurat <- RunUMAP(seurat, dims = 1:10, reduction = "pca", reduction.name = "umap.unintegrated")
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 23:36:07 UMAP embedding parameters a = 0.9922 b = 1.112
## 23:36:07 Read 10255 rows and found 10 numeric columns
## 23:36:07 Using Annoy for neighbor search, n_neighbors = 30
## 23:36:07 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 23:36:08 Writing NN index file to temp file /var/folders/hv/xklkw9n15xgbktqz58g7gtbw0000gn/T//Rtmp74Y4Pl/filede9c190742c0
## 23:36:08 Searching Annoy index using 1 thread, search_k = 3000
## 23:36:13 Annoy recall = 100%
## 23:36:14 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 23:36:15 Initializing from normalized Laplacian + noise (using RSpectra)
## 23:36:16 Commencing optimization for 200 epochs, with 410494 positive edges
## 23:36:24 Optimization finished
DimPlot(
seurat,
reduction = "umap.unintegrated",
group.by = "condition",
combine = FALSE, label.size = 2
)
## [[1]]
DimPlot(seurat, reduction = "umap.unintegrated")
# visualize by batch and cell type annotation
# cell type annotations were previously added by Azimuth
#DimPlot(seurat, reduction = "umap.unintegrated", group.by = "predicted.celltype.l2")
#this will take a long time
seurat <- IntegrateLayers(object = seurat, method = CCAIntegration, new.reduction = "integrated.cca", verbose = F)
seurat <- FindNeighbors(seurat, reduction = "integrated.cca", dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
seurat <- FindClusters(seurat, resolution = 0.1, cluster.name = "integrated_clusters")
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10255
## Number of edges: 342359
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9510
## Number of communities: 6
## Elapsed time: 1 seconds
seurat <- RunUMAP(seurat, reduction = "integrated.cca", dims = 1:10, reduction.name = "umap.cca")
## 23:39:43 UMAP embedding parameters a = 0.9922 b = 1.112
## 23:39:43 Read 10255 rows and found 10 numeric columns
## 23:39:43 Using Annoy for neighbor search, n_neighbors = 30
## 23:39:43 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 23:39:44 Writing NN index file to temp file /var/folders/hv/xklkw9n15xgbktqz58g7gtbw0000gn/T//Rtmp74Y4Pl/filede9c320ad4a3
## 23:39:44 Searching Annoy index using 1 thread, search_k = 3000
## 23:39:49 Annoy recall = 100%
## 23:39:50 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 23:39:52 Initializing from normalized Laplacian + noise (using RSpectra)
## 23:39:52 Commencing optimization for 200 epochs, with 432952 positive edges
## 23:40:01 Optimization finished
DimPlot(
seurat,
reduction = "umap.cca",
group.by = "condition",
combine = FALSE, label.size = 2
)
## [[1]]
# DimPlot(
# seurat,
# reduction = "umap.cca",
# group.by = "predicted.celltype.l2",
# combine = FALSE, label.size = 2
# )
DimPlot(seurat, reduction = "umap.cca")
# re-join layers after integration
seurat[["RNA"]] <- JoinLayers(seurat[["RNA"]])
seurat
## An object of class Seurat
## 38224 features across 10255 samples within 1 assay
## Active assay: RNA (38224 features, 2000 variable features)
## 3 layers present: data, counts, scale.data
## 4 dimensional reductions calculated: pca, umap.unintegrated, integrated.cca, umap.cca
markers0 <- FindMarkers(
object = seurat,
ident.1 = "0",
ident.2 = NULL,
only.pos = TRUE
)
markers1 <- FindMarkers(
object = seurat,
ident.1 = "1",
ident.2 = NULL,
only.pos = TRUE
)
markers2 <- FindMarkers(
object = seurat,
ident.1 = "2",
ident.2 = NULL,
only.pos = TRUE
)
markers3 <- FindMarkers(
object = seurat,
ident.1 = "3",
ident.2 = NULL,
only.pos = TRUE
)
markers4 <- FindMarkers(
object = seurat,
ident.1 = "4",
ident.2 = NULL,
only.pos = TRUE
)
markers5 <- FindMarkers(
object = seurat,
ident.1 = "5",
ident.2 = "0",
only.pos = TRUE
)
seurat$cond_celltype <- paste(seurat$condition, seurat$integrated_clusters, sep = "_")
Idents(seurat) <- "cond_celltype"
table(seurat$cond_celltype)
##
## Endometriosis_0 Endometriosis_1 Endometriosis_2
## 1993 2365 543
## Endometriosis_3 Endometriosis_4 Endometriosis_5
## 483 275 174
## Eutopic_Endometrium_0 Eutopic_Endometrium_1 Eutopic_Endometrium_2
## 2825 1065 178
## Eutopic_Endometrium_3 Eutopic_Endometrium_4 Eutopic_Endometrium_5
## 153 183 18
cond_celltype_table <- table(seurat$cond_celltype)
cond_celltype_df <- as.data.frame(cond_celltype_table)
colnames(cond_celltype_df) <- c("Cluster", "Count")
cond_celltype_df <- cond_celltype_df %>%
mutate(Condition = ifelse(grepl("Endometriosis", Cluster, ignore.case = TRUE),
"Endometriosis",
"Eutopic_Endometrium"))
cond_celltype_df$Cluster <- sub("^[^0-9]+", "", cond_celltype_df$Cluster)
print(cond_celltype_df)
## Cluster Count Condition
## 1 0 1993 Endometriosis
## 2 1 2365 Endometriosis
## 3 2 543 Endometriosis
## 4 3 483 Endometriosis
## 5 4 275 Endometriosis
## 6 5 174 Endometriosis
## 7 0 2825 Eutopic_Endometrium
## 8 1 1065 Eutopic_Endometrium
## 9 2 178 Eutopic_Endometrium
## 10 3 153 Eutopic_Endometrium
## 11 4 183 Eutopic_Endometrium
## 12 5 18 Eutopic_Endometrium
ggplot(cond_celltype_df, aes(x = Cluster, y = Count, fill = Condition)) +
geom_bar(stat = "identity") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = "Cells Present in Each Cluster", x = "Cluster", y = "Count")
perts <- cond_celltype_df %>%
group_by(Condition) %>%
mutate(Total = sum(Count), Percentage = (Count / Total) * 100) %>%
select(Cluster, Count, Condition, Percentage)
head(perts, 12)
## # A tibble: 12 × 4
## # Groups: Condition [2]
## Cluster Count Condition Percentage
## <chr> <int> <chr> <dbl>
## 1 0 1993 Endometriosis 34.2
## 2 1 2365 Endometriosis 40.5
## 3 2 543 Endometriosis 9.31
## 4 3 483 Endometriosis 8.28
## 5 4 275 Endometriosis 4.71
## 6 5 174 Endometriosis 2.98
## 7 0 2825 Eutopic_Endometrium 63.9
## 8 1 1065 Eutopic_Endometrium 24.1
## 9 2 178 Eutopic_Endometrium 4.03
## 10 3 153 Eutopic_Endometrium 3.46
## 11 4 183 Eutopic_Endometrium 4.14
## 12 5 18 Eutopic_Endometrium 0.407
library(ggplot2)
library(dplyr)
endometriosis_data <- filter(perts, Condition == "Endometriosis")
ggplot(endometriosis_data, aes(x = "", y = Percentage, fill = factor(Cluster))) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y") +
labs(title = "Endometriosis - Initial Cluster Distribution") +
theme_void() +
theme(legend.title = element_blank())
# Plot for Eutopic Endometrium
eutopic_data <- filter(perts, Condition == "Eutopic_Endometrium")
ggplot(eutopic_data, aes(x = "", y = Percentage, fill = factor(Cluster))) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y") +
labs(title = "Eutopic Endometrium - Initial Cluster Distribution") +
theme_void() +
theme(legend.title = element_blank())
cond_markers0 <- FindMarkers(seurat, ident.1 = "Endometriosis_0", ident.2 = "Eutopic_Endometrium_0", verbose = FALSE)
cond_markers1 <- FindMarkers(seurat, ident.1 = "Endometriosis_1", ident.2 = "Eutopic_Endometrium_1", verbose = FALSE)
cond_markers2 <- FindMarkers(seurat, ident.1 = "Endometriosis_2", ident.2 = "Eutopic_Endometrium_2", verbose = FALSE)
cond_markers3 <- FindMarkers(seurat, ident.1 = "Endometriosis_3", ident.2 = "Eutopic_Endometrium_3", verbose = FALSE)
cond_markers4 <- FindMarkers(seurat, ident.1 = "Endometriosis_4", ident.2 = "Eutopic_Endometrium_4", verbose = FALSE)
cond_markers5 <- FindMarkers(seurat, ident.1 = "Endometriosis_5", ident.2 = "Eutopic_Endometrium_5", verbose = FALSE)
library("knitr")
top_and_bot <- function(ldf, bot = TRUE) {
ldf <- ldf[order(ldf$avg_log2FC, decreasing = TRUE), ]
if (bot) {
top_3 <- head(ldf, 3)
result <- top_3
}
top_5 <- head(ldf, 5)
result <- top_5
if (bot) {
bottom_3 <- tail(ldf, 3)
result <- rbind(result, bottom_3)
}
print(knitr::kable(result, format = "simple"))
return(result)
}
top_and_bot(markers0, bot = FALSE)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## ----------- ------ ----------- ------ ------ ----------
## PCSK1 0 8.694589 0.018 0.000 0
## C11orf53 0 8.134797 0.013 0.000 0
## AL117348.1 0 5.530758 0.017 0.000 0
## AL139393.3 0 5.187896 0.013 0.000 0
## AC005062.1 0 4.672949 0.021 0.001 0
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## PCSK1 7.902303e-24 8.694589 0.018 0.000 3.020576e-19
## C11orf53 2.737425e-17 8.134797 0.013 0.000 1.046353e-12
## AL117348.1 3.187419e-20 5.530758 0.017 0.000 1.218359e-15
## AL139393.3 3.296785e-16 5.187896 0.013 0.000 1.260163e-11
## AC005062.1 1.799464e-24 4.672949 0.021 0.001 6.878272e-20
#genes from find marker genes cluster 0
FeaturePlot(seurat, features = c("PCSK1","C11orf53"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))
## Warning: All cells have the same value (0) of "PCSK1"
top_and_bot(cond_markers0)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## ------- ------ ----------- ------ ------ ----------
## IGHG1 0 13.13153 0.028 0.000 0
## WISP2 0 12.73632 0.130 0.000 0
## TPSAB1 0 12.10728 0.030 0.000 0
## SFRP2 0 10.98770 0.327 0.001 0
## IGLC3 0 10.89223 0.027 0.000 0
## CCN1 0 -11.86982 0.000 0.150 0
## SNHG29 0 -12.33631 0.000 0.218 0
## IGFBP1 0 -15.96739 0.000 0.781 0
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## IGHG1 3.233099e-19 13.13153 0.028 0.000 1.235820e-14
## WISP2 2.894087e-86 12.73632 0.130 0.000 1.106236e-81
## TPSAB1 1.713441e-20 12.10728 0.030 0.000 6.549459e-16
## SFRP2 8.563385e-231 10.98770 0.327 0.001 3.273268e-226
## IGLC3 1.403127e-18 10.89223 0.027 0.000 5.363313e-14
## CCN1 4.335389e-73 -11.86982 0.000 0.150 1.657159e-68
## SNHG29 7.046366e-110 -12.33631 0.000 0.218 2.693403e-105
## IGFBP1 0.000000e+00 -15.96739 0.000 0.781 0.000000e+00
#proliferative phase marker genes
FeaturePlot(seurat, features = c("PGR", "ESR1"), reduction = "umap.cca",split.by = "condition", cols = c("lightgray", "red"))
#proliferative phase marker genes
FeaturePlot(seurat, features = c("IGF1", "MMP11"), reduction = "umap.cca",split.by = "condition", cols = c("lightgray", "red"))
#proliferative phase marker genes
FeaturePlot(seurat, features = c("CRABP2", "ECM1"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))
#secretory phase marker genes
FeaturePlot(seurat, features = c("PAEP","FOXO1"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))
top_and_bot(markers1, bot = FALSE)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## ----------- ------ ----------- ------ ------ ----------
## AC006369.1 0 8.789679 0.018 0 0
## AL355581.1 0 8.453120 0.013 0 0
## C15orf53 0 8.191206 0.026 0 0
## LINC02195 0 8.007510 0.010 0 0
## TTC16 0 7.880154 0.018 0 0
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## AC006369.1 2.898012e-29 8.789679 0.018 0 1.107736e-24
## AL355581.1 8.966751e-22 8.453120 0.013 0 3.427451e-17
## C15orf53 2.503265e-40 8.191206 0.026 0 9.568479e-36
## LINC02195 4.821539e-16 8.007510 0.010 0 1.842985e-11
## TTC16 2.063818e-28 7.880154 0.018 0 7.888737e-24
#genes from find marker genes cluster 1 T- cell CD3D, TRAC, CD3E, TRBC2
FeaturePlot(seurat, features = c("CD3D","TRAC"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))
# PAEP and SPP1 (secretory phase genes) are up in endometriosis compared to Eutopic t-cells
# also up is COLGALT2 wich is overexressed in ovarian cancer
top_and_bot(cond_markers1)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## ----------- ------ ----------- ------ ------ ----------
## IGLC2 0 9.968327 0.030 0.000 0.0005416
## SFRP2 0 9.153674 0.119 0.000 0.0000000
## AL031777.3 0 8.811295 0.071 0.000 0.0000000
## HEXDC 0 8.679404 0.103 0.000 0.0000000
## KIAA1147 0 8.453847 0.089 0.000 0.0000000
## SEPTIN7 0 -13.099985 0.000 0.444 0.0000000
## LINC-PINT 0 -13.176507 0.000 0.395 0.0000000
## IGFBP1 0 -14.788855 0.000 0.456 0.0000000
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## IGLC2 1.416840e-08 9.968327 0.030 0.000 5.415731e-04
## SFRP2 9.590463e-32 9.153674 0.119 0.000 3.665859e-27
## AL031777.3 3.829661e-19 8.811295 0.071 0.000 1.463850e-14
## HEXDC 1.697051e-27 8.679404 0.103 0.000 6.486809e-23
## KIAA1147 8.839429e-24 8.453847 0.089 0.000 3.378783e-19
## SEPTIN7 6.294724e-265 -13.099985 0.000 0.444 2.406095e-260
## LINC-PINT 2.288511e-232 -13.176506 0.000 0.395 8.747606e-228
## IGFBP1 3.192085e-273 -14.788855 0.000 0.456 1.220143e-268
FeaturePlot(seurat, features = c("IGFBP1","PAEP"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))
## Warning: All cells have the same value (0) of "IGFBP1"
top_and_bot(markers2, bot = FALSE)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## ------- ------ ----------- ------ ------ ----------
## CD300E 0 11.76204 0.083 0 0
## HK3 0 10.60345 0.049 0 0
## CCL17 0 10.48961 0.015 0 0
## TREM2 0 10.28816 0.082 0 0
## VSTM1 0 10.26670 0.036 0 0
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## CD300E 1.485122e-175 11.76204 0.083 0 5.676732e-171
## HK3 5.434077e-103 10.60345 0.049 0 2.077122e-98
## CCL17 1.767388e-30 10.48961 0.015 0 6.755664e-26
## TREM2 2.669511e-166 10.28816 0.082 0 1.020394e-161
## VSTM1 6.220418e-77 10.26670 0.036 0 2.377693e-72
#genes from find marker genes cluster 2 macrophage
FeaturePlot(seurat, features = c("CD14","CD68"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))
top_and_bot(cond_markers2)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## --------- ---------- ----------- ------ ------ ----------
## HBA2 0.0000000 8.990105 0.324 0.006 0.0000000
## IGHG1 0.1039162 8.246955 0.015 0.000 1.0000000
## HLA-DQA2 0.0000000 8.095532 0.477 0.011 0.0000000
## S100A12 0.0000000 7.184686 0.177 0.011 0.0006754
## RBP7 0.0000002 6.990111 0.138 0.000 0.0067005
## DKK1 0.0000000 -10.066691 0.000 0.169 0.0000000
## IGFBP1 0.0000000 -13.413626 0.000 0.652 0.0000000
## PAEP 0.0000000 -13.857578 0.000 0.427 0.0000000
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## HBA2 2.480255e-17 8.990105 0.324 0.006 9.480525e-13
## IGHG1 1.039162e-01 8.246955 0.015 0.000 1.000000e+00
## HLA-DQA2 5.527340e-28 8.095532 0.477 0.011 2.112771e-23
## S100A12 1.766985e-08 7.184686 0.177 0.011 6.754125e-04
## RBP7 1.752951e-07 6.990111 0.138 0.000 6.700478e-03
## DKK1 1.645538e-22 -10.066691 0.000 0.169 6.289903e-18
## IGFBP1 1.195240e-92 -13.413626 0.000 0.652 4.568686e-88
## PAEP 5.384122e-58 -13.857578 0.000 0.427 2.058027e-53
#no large diff
FeaturePlot(seurat, features = c("HBA2","PAEP"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))
top_and_bot(markers3, bot = FALSE)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## ----------- ------ ----------- ------ ------ ----------
## MSTN 0 8.647461 0.013 0.000 0
## AL591501.1 0 8.405994 0.016 0.000 0
## JAKMIP3 0 8.352714 0.011 0.000 0
## AC107029.2 0 8.193623 0.013 0.000 0
## SCN7A 0 6.244991 0.230 0.002 0
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## MSTN 3.682901e-28 8.647461 0.013 0.000 1.407752e-23
## AL591501.1 8.692087e-35 8.405994 0.016 0.000 3.322464e-30
## JAKMIP3 7.645124e-25 8.352714 0.011 0.000 2.922272e-20
## AC107029.2 3.682902e-28 8.193623 0.013 0.000 1.407753e-23
## SCN7A 0.000000e+00 6.244991 0.230 0.002 0.000000e+00
#genes from find marker genes cluster 3
FeaturePlot(seurat, features = c("MSTN","AL591501.1"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))
## Warning: All cells have the same value (0) of "MSTN"
## Warning: All cells have the same value (0) of "AL591501.1"
top_and_bot(cond_markers3)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## -------- ------ ----------- ------ ------ ----------
## WISP2 0e+00 9.762120 0.451 0.000 0.0000000
## HBB 0e+00 8.761504 0.348 0.013 0.0000000
## IGKC 2e-07 8.718460 0.155 0.000 0.0088675
## GPC3 0e+00 7.824269 0.236 0.000 0.0000017
## NTRK2 0e+00 7.464346 0.226 0.000 0.0000055
## SNHG29 0e+00 -11.348904 0.000 0.601 0.0000000
## SEPTIN7 0e+00 -11.586616 0.000 0.614 0.0000000
## CCN1 0e+00 -12.259395 0.000 0.549 0.0000000
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## WISP2 2.524869e-23 9.762120 0.451 0.000 9.651061e-19
## HBB 5.762980e-16 8.761504 0.348 0.013 2.202841e-11
## IGKC 2.319871e-07 8.718460 0.155 0.000 8.867476e-03
## GPC3 4.557862e-11 7.824269 0.236 0.000 1.742197e-06
## NTRK2 1.436663e-10 7.464345 0.226 0.000 5.491500e-06
## SNHG29 4.165860e-75 -11.348904 0.000 0.601 1.592358e-70
## SEPTIN7 6.033710e-77 -11.586616 0.000 0.614 2.306325e-72
## CCN1 7.295654e-68 -12.259395 0.000 0.549 2.788691e-63
#big diff
FeaturePlot(seurat, features = c("WISP2","CCN1"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))
## Warning: All cells have the same value (0) of "CCN1"
## Warning: All cells have the same value (0) of "WISP2"
top_and_bot(markers4, bot = FALSE)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## ----------- ------ ----------- ------ ------ ----------
## LINC00636 0 10.338143 0.026 0 0
## AC093909.4 0 10.217307 0.017 0 0
## DIPK2B 0 9.718333 0.096 0 0
## ARHGEF15 0 8.501079 0.061 0 0
## LINC01235 0 8.279611 0.022 0 0
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## LINC00636 7.887366e-58 10.338143 0.026 0 3.014867e-53
## AC093909.4 3.967643e-39 10.217307 0.017 0 1.516592e-34
## DIPK2B 7.408693e-198 9.718333 0.096 0 2.831899e-193
## ARHGEF15 1.092423e-127 8.501079 0.061 0 4.175677e-123
## LINC01235 5.485774e-40 8.279611 0.022 0 2.096882e-35
#cluster 4
FeaturePlot(seurat, features = c("LINC00636","AC093909.4"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))
top_and_bot(cond_markers4)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## ------- ---------- ----------- ------ ------ ----------
## SFRP2 0.0000001 8.755766 0.145 0.000 0.0027481
## IGKC 0.0000000 8.533294 0.156 0.000 0.0008023
## NTS 0.0000027 8.342142 0.113 0.000 0.1023558
## DES 0.0001332 8.158068 0.076 0.000 1.0000000
## HBA2 0.0000000 8.130005 0.338 0.038 0.0000000
## CCN2 0.0000000 -10.645983 0.000 0.240 0.0000000
## IGFBP1 0.0000000 -11.629046 0.000 0.530 0.0000000
## PAEP 0.0000000 -12.346616 0.000 0.273 0.0000000
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## SFRP2 7.189530e-08 8.755766 0.145 0.000 2.748126e-03
## IGKC 2.099052e-08 8.533294 0.156 0.000 8.023416e-04
## NTS 2.677790e-06 8.342142 0.113 0.000 1.023558e-01
## DES 1.332102e-04 8.158068 0.076 0.000 1.000000e+00
## HBA2 6.852031e-15 8.130005 0.338 0.038 2.619121e-10
## CCN2 1.487566e-17 -10.645982 0.000 0.240 5.686071e-13
## IGFBP1 2.815495e-41 -11.629046 0.000 0.530 1.076195e-36
## PAEP 5.544995e-20 -12.346615 0.000 0.273 2.119519e-15
#cluster 4
FeaturePlot(seurat, features = c("SFRP2","PAEP"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))
top_and_bot(markers5, bot = FALSE)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## -------- ------ ----------- ------ ------ ----------
## IGHD 0 12.75202 0.161 0.000 0
## IGHA2 0 12.59622 0.094 0.000 0
## FAM129C 0 12.53100 0.245 0.000 0
## CD79A 0 11.98159 0.656 0.001 0
## TLR10 0 11.91428 0.214 0.000 0
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## IGHD 3.319619e-172 12.75202 0.161 0.000 1.268891e-167
## IGHA2 1.441515e-100 12.59622 0.094 0.000 5.510045e-96
## FAM129C 7.854885e-261 12.53100 0.245 0.000 3.002451e-256
## CD79A 0.000000e+00 11.98159 0.656 0.001 0.000000e+00
## TLR10 1.555762e-227 11.91428 0.214 0.000 5.946743e-223
#cluster 5
FeaturePlot(seurat, features = c("IGHD","IGHA2"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))
top_and_bot(cond_markers5)
##
##
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## ------- ---------- ----------- ------ ------ ----------
## IGLC3 0.0407866 10.281241 0.195 0.000 1
## IGHG1 0.0038076 8.930956 0.437 0.111 1
## IGLC2 0.0366354 8.328134 0.351 0.111 1
## IGHG2 0.0624600 7.605519 0.167 0.000 1
## IGHG3 0.0240335 7.429883 0.230 0.000 1
## GAS5 0.0000000 -9.730238 0.000 0.444 0
## PAEP 0.0000000 -11.551318 0.000 0.333 0
## IGFBP1 0.0000000 -11.734239 0.000 0.556 0
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## IGLC3 4.078657e-02 10.281241 0.195 0.000 1.000000e+00
## IGHG1 3.807593e-03 8.930956 0.437 0.111 1.000000e+00
## IGLC2 3.663540e-02 8.328134 0.351 0.111 1.000000e+00
## IGHG2 6.246001e-02 7.605519 0.167 0.000 1.000000e+00
## IGHG3 2.403353e-02 7.429883 0.230 0.000 1.000000e+00
## GAS5 3.533936e-19 -9.730238 0.000 0.444 1.350812e-14
## PAEP 1.270008e-14 -11.551318 0.000 0.333 4.854477e-10
## IGFBP1 8.165717e-24 -11.734239 0.000 0.556 3.121264e-19
#cluster 5
FeaturePlot(seurat, features = c("IGLC3","IGHG1"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))
## Warning: All cells have the same value (0) of "IGLC3"