cluster 0
Cluster 1
Cluster 2
Cluster 3
Cluster 4
Cluster 5

load in 2 samples of each condition so that this code can run on my computer

data_dir ="/Users/rorihoover/Desktop/BME494HW/Eutopic/sample30/outs/filtered_feature_bc_matrix/" 
data <- Read10X(data.dir = data_dir)
Eutopic_seurat = CreateSeuratObject(counts = data)

data_dir ="/Users/rorihoover/Desktop/BME494HW/Eutopic/sample31/outs/filtered_feature_bc_matrix/" 
data <- Read10X(data.dir = data_dir)
seurat_object = CreateSeuratObject(counts = data)
Eutopic_seurat<- merge(Eutopic_seurat, y = seurat_object)

## Warning: Some cell names are duplicated across objects provided. Renaming to
## enforce unique cell names.

Eutopic_seurat<- merge(Eutopic_seurat, y = seurat_object)
rm(seurat_object)
rm(data)

#saveRDS(Eutopic_seurat, file = "Eutopic_seurat.rds")

Length<- length(Cells(Eutopic_seurat))
condition <- list(rep("Eutopic_Endometrium", Length))
meta_data <- data.frame(condition)
rownames(meta_data)<- Cells(Eutopic_seurat)

Eutopic_seurat<- AddMetaData(
  object = Eutopic_seurat,
  metadata = meta_data,
  col.name = 'condition'
)
head(x = Eutopic_seurat[[]])

##                         orig.ident nCount_RNA nFeature_RNA           condition
## AAACCCAAGACCTTTG-1_1 SeuratProject       1057          523 Eutopic_Endometrium
## AAACCCAAGACTCTTG-1_1 SeuratProject       4052         1516 Eutopic_Endometrium
## AAACCCAAGAGAATCT-1_1 SeuratProject       4135         1424 Eutopic_Endometrium
## AAACCCAAGATGCAGC-1_1 SeuratProject        853          431 Eutopic_Endometrium
## AAACCCAAGATTCGCT-1_1 SeuratProject       5051         1823 Eutopic_Endometrium
## AAACCCAAGGTTGAGC-1_1 SeuratProject        750          422 Eutopic_Endometrium

data_dir ="/Users/rorihoover/Desktop/BME494HW/endo/sample7/outs/filtered_feature_bc_matrix/" 
data <- Read10X(data.dir = data_dir)
Endo_seurat = CreateSeuratObject(counts = data)

data_dir ="/Users/rorihoover/Desktop/BME494HW/endo/sample10/outs/filtered_feature_bc_matrix/"
data <- Read10X(data.dir = data_dir)
seurat_object = CreateSeuratObject(counts = data)
Endo_seurat<- merge(Endo_seurat, y = seurat_object)

## Warning: Some cell names are duplicated across objects provided. Renaming to
## enforce unique cell names.

rm(seurat_object)
rm(data)

#saveRDS(Endo_seurat, file = "Endo_seurat.rds")

Length<- length(Cells(Endo_seurat))

condition <- list(rep("Endometriosis", Length))
meta_data <- data.frame(condition)
rownames(meta_data)<- Cells(Endo_seurat)

Endo_seurat<- AddMetaData(
  object = Endo_seurat,
  metadata = meta_data,
  col.name = 'condition'
)
rm(meta_data)
rm(Length)
rm(condition)

seurat<- merge(Endo_seurat, y = Eutopic_seurat)

## Warning: Some cell names are duplicated across objects provided. Renaming to
## enforce unique cell names.

rm(Endo_seurat)
rm(Eutopic_seurat)

head(x = seurat[[]])

##                           orig.ident nCount_RNA nFeature_RNA     condition
## AAACCCAAGCAAATCA-1_1_1 SeuratProject        826          454 Endometriosis
## AAACCCAAGGCTGAAC-1_1_1 SeuratProject      10295         3064 Endometriosis
## AAACCCAAGTCGCGAA-1_1_1 SeuratProject       4798         1840 Endometriosis
## AAACCCACAAGGTCGA-1_1_1 SeuratProject       2669         1410 Endometriosis
## AAACCCACAATCCTTT-1_1_1 SeuratProject        582          241 Endometriosis
## AAACCCACACGAGAAC-1_1_1 SeuratProject       7856         2544 Endometriosis

seurat[["percent.mt"]] <- PercentageFeatureSet(seurat, pattern = "^MT-")
VlnPlot(seurat, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

FeatureScatter(seurat, feature1 = "nCount_RNA", feature2 = "percent.mt")+
  geom_hline(yintercept = 5, linetype = "dashed", color = "black")

FeatureScatter(seurat, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")+
  geom_hline(yintercept = 2500, linetype = "dashed", color = "black")+
  geom_hline(yintercept = 200, linetype = "dashed", color = "black")

seurat <- subset(seurat, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)

seurat[["percent.mt"]] <- PercentageFeatureSet(seurat, pattern = "^MT-")
VlnPlot(seurat, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

#strange error
#seurat <- RunAzimuth(seurat, reference = "pbmcref")
#seurat

seurat <- NormalizeData(seurat)

## Normalizing layer: counts.1.1

## Normalizing layer: counts.1.1.2

## Normalizing layer: counts.2.1

## Normalizing layer: counts.2.2

## Normalizing layer: counts.2.1.2

seurat <- FindVariableFeatures(seurat)

## Finding variable features for layer counts.1.1

## Finding variable features for layer counts.1.1.2

## Finding variable features for layer counts.2.1

## Finding variable features for layer counts.2.2

## Finding variable features for layer counts.2.1.2

seurat <- ScaleData(seurat)

## Centering and scaling data matrix

seurat <- RunPCA(seurat)

## PC_ 1 
## Positive:  DCN, COL1A1, LUM, COL3A1, IGFBP7, COL6A2, SPARCL1, FBLN1, COL1A2, SPARC 
##     C1R, COL6A1, IGFBP4, MMP2, CALD1, C1S, SERPINF1, LGALS1, PCOLCE, TIMP3 
##     COL6A3, SERPING1, SELENOP, TIMP2, GSN, IGFBP6, CST3, AEBP1, COL14A1, MGP 
## Negative:  SRGN, CXCR4, CD52, CCL5, CD48, CORO1A, LAPTM5, RAC2, CST7, IL32 
##     CD2, KLRB1, GZMA, TRBC2, CD7, CD3D, TRAC, CD69, GZMK, RUNX3 
##     STK17B, FYB1, IL7R, NKG7, TSC22D3, SAMSN1, BTG1, ISG20, LTB, TRBC1 
## PC_ 2 
## Positive:  C11orf96, IGFBP1, RBP1, PAEP, APOE, RND3, IGFBP2, RAMP1, FOSB, TIMP3 
##     IGFBP3, NFKBIA, GNLY, ARC, IER3, TGM2, ATF3, GADD45G, IER2, TXN 
##     PPP1R15A, LEFTY2, VCAN, EGR1, RRAD, GADD45A, JUNB, SAT1, CXCL8, SERTAD1 
## Negative:  CCDC80, SFRP2, MGP, C7, SFRP1, S100A10, COL1A2, SFRP4, DPT, COL14A1 
##     GSN, CPE, MRC2, OGN, S100A4, IGFBP5, FGL2, PI16, CPVL, CFH 
##     C3, PLAC9, COLEC12, PLXDC2, MMP23B, PRELP, SVEP1, BOC, TNXB, PLTP 
## PC_ 3 
## Positive:  LYZ, MS4A6A, AIF1, HLA-DRA, CYBB, HLA-DRB1, MS4A7, HLA-DQA1, FCGR2A, FCN1 
##     HLA-DPA1, HLA-DMA, LST1, MNDA, HLA-DPB1, CLEC7A, HLA-DQB1, SPI1, CTSS, RNASE6 
##     CD68, TYROBP, CSF1R, IFI30, C5AR1, IL1B, CD86, NCF2, CD74, HLA-DMB 
## Negative:  TRAC, IL32, CD3D, MGP, CD2, TRBC2, CCL5, SFRP4, CCDC80, TRBC1 
##     SFRP2, SFRP1, IL7R, GZMA, HBB, GZMK, HBA2, LTB, KLRB1, PLAC9 
##     CD8A, IKZF3, HBA1, PTGDS, CD7, CD8B, ADIRF, DPT, ITM2A, CLEC2D 
## PC_ 4 
## Positive:  MALAT1, MT-CO1, CREM, NEAT1, MCL1, XCL2, CEMIP2, PIK3R1, HSP90AA1, ZFP36 
##     MYADM, XCL1, KLRD1, TSC22D3, FAM177A1, YPEL5, HSPH1, REL, SYTL3, HSPD1 
##     BTG1, CD7, WSB1, SRGN, NKG7, CXCR4, CRIP1, CST7, CHORDC1, CCL5 
## Negative:  HLA-DRA, AIF1, LYZ, FTL, S100A6, MS4A6A, HLA-DQA1, HLA-DPB1, HLA-DPA1, HLA-DQB1 
##     MNDA, HBB, CD74, LST1, FTH1, FCN1, SPI1, CYBB, TMSB10, CLEC10A 
##     S100A9, HLA-DRB1, HBA2, HLA-DMB, HLA-DMA, MPEG1, FCER1A, SERPINA1, LGALS2, LGALS1 
## PC_ 5 
## Positive:  CAVIN2, CLDN5, ECSCR, EGFL7, EMCN, MMRN1, TM4SF18, CCL21, RAMP2, TM4SF1 
##     TIE1, ESAM, CDH5, PROX1, TINAGL1, ARHGAP29, A2M, SMAD1, TFF3, PTPRB 
##     VWF, GNG11, ROBO4, AQP1, PECAM1, PCAT19, FABP4, CRIP2, ADGRL4, BCAM 
## Negative:  DCN, FBLN1, LUM, COL1A1, LGALS1, OGN, MMP2, SFRP2, C3, VCAN 
##     SPON2, COL3A1, CFD, SERPINF1, C1R, IGFBP6, SFRP1, PCOLCE, OLFML3, S100A4 
##     PTGDS, PI16, ISLR, MRC2, PDGFRL, CEBPB, C1S, GPNMB, CCDC80, S100A6

VizDimLoadings(seurat, dims = 1:2, reduction = "pca")

DimPlot(seurat, reduction = "pca", group.by = "condition")

ElbowPlot(seurat)

seurat <- FindNeighbors(seurat, dims = 1:10, reduction = "pca")

## Computing nearest neighbor graph

## Computing SNN

seurat<- FindClusters(seurat, resolution = 0.1, cluster.name = "unintegrated_clusters")

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 10255
## Number of edges: 332360
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9734
## Number of communities: 9
## Elapsed time: 1 seconds

seurat <- RunUMAP(seurat, dims = 1:10, reduction = "pca", reduction.name = "umap.unintegrated")

## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session

## 23:36:07 UMAP embedding parameters a = 0.9922 b = 1.112

## 23:36:07 Read 10255 rows and found 10 numeric columns

## 23:36:07 Using Annoy for neighbor search, n_neighbors = 30

## 23:36:07 Building Annoy index with metric = cosine, n_trees = 50

## 0%   10   20   30   40   50   60   70   80   90   100%

## [----|----|----|----|----|----|----|----|----|----|

## **************************************************|
## 23:36:08 Writing NN index file to temp file /var/folders/hv/xklkw9n15xgbktqz58g7gtbw0000gn/T//Rtmp74Y4Pl/filede9c190742c0
## 23:36:08 Searching Annoy index using 1 thread, search_k = 3000
## 23:36:13 Annoy recall = 100%
## 23:36:14 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 23:36:15 Initializing from normalized Laplacian + noise (using RSpectra)
## 23:36:16 Commencing optimization for 200 epochs, with 410494 positive edges
## 23:36:24 Optimization finished

DimPlot(
  seurat,
  reduction = "umap.unintegrated",
  group.by = "condition",
  combine = FALSE, label.size = 2
)

## [[1]]

DimPlot(seurat, reduction = "umap.unintegrated")

# visualize by batch and cell type annotation
# cell type annotations were previously added by Azimuth
#DimPlot(seurat, reduction = "umap.unintegrated", group.by = "predicted.celltype.l2")

#this will take a long time 
seurat <- IntegrateLayers(object = seurat, method = CCAIntegration, new.reduction = "integrated.cca", verbose = F)

seurat <- FindNeighbors(seurat, reduction = "integrated.cca", dims = 1:10)

## Computing nearest neighbor graph

## Computing SNN

seurat <- FindClusters(seurat, resolution = 0.1, cluster.name = "integrated_clusters")

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 10255
## Number of edges: 342359
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9510
## Number of communities: 6
## Elapsed time: 1 seconds

seurat <- RunUMAP(seurat, reduction = "integrated.cca", dims = 1:10, reduction.name = "umap.cca")

## 23:39:43 UMAP embedding parameters a = 0.9922 b = 1.112

## 23:39:43 Read 10255 rows and found 10 numeric columns

## 23:39:43 Using Annoy for neighbor search, n_neighbors = 30

## 23:39:43 Building Annoy index with metric = cosine, n_trees = 50

## 0%   10   20   30   40   50   60   70   80   90   100%

## [----|----|----|----|----|----|----|----|----|----|

## **************************************************|
## 23:39:44 Writing NN index file to temp file /var/folders/hv/xklkw9n15xgbktqz58g7gtbw0000gn/T//Rtmp74Y4Pl/filede9c320ad4a3
## 23:39:44 Searching Annoy index using 1 thread, search_k = 3000
## 23:39:49 Annoy recall = 100%
## 23:39:50 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 23:39:52 Initializing from normalized Laplacian + noise (using RSpectra)
## 23:39:52 Commencing optimization for 200 epochs, with 432952 positive edges
## 23:40:01 Optimization finished

DimPlot(
  seurat,
  reduction = "umap.cca",
  group.by = "condition",
  combine = FALSE, label.size = 2
)

## [[1]]

# DimPlot(
#   seurat,
#   reduction = "umap.cca",
#   group.by = "predicted.celltype.l2",
#   combine = FALSE, label.size = 2
# )

DimPlot(seurat, reduction = "umap.cca")

# re-join layers after integration
seurat[["RNA"]] <- JoinLayers(seurat[["RNA"]])
seurat

## An object of class Seurat 
## 38224 features across 10255 samples within 1 assay 
## Active assay: RNA (38224 features, 2000 variable features)
##  3 layers present: data, counts, scale.data
##  4 dimensional reductions calculated: pca, umap.unintegrated, integrated.cca, umap.cca

markers0 <- FindMarkers(
  object = seurat,   
  ident.1 = "0",    
  ident.2 = NULL, 
  only.pos = TRUE
)

markers1 <- FindMarkers(
  object = seurat,   
  ident.1 = "1",    
  ident.2 = NULL, 
  only.pos = TRUE
)

markers2 <- FindMarkers(
  object = seurat, 
  ident.1 = "2",      
  ident.2 = NULL, 
  only.pos = TRUE
)

markers3 <- FindMarkers(
  object = seurat,   
  ident.1 = "3",     
  ident.2 = NULL, 
  only.pos = TRUE
)

markers4 <- FindMarkers(
  object = seurat,    
  ident.1 = "4",     
  ident.2 = NULL, 
  only.pos = TRUE
)

markers5 <- FindMarkers(
  object = seurat,    
  ident.1 = "5",      
  ident.2 = "0", 
  only.pos = TRUE
)

seurat$cond_celltype <- paste(seurat$condition, seurat$integrated_clusters, sep = "_")
Idents(seurat) <- "cond_celltype"
table(seurat$cond_celltype)

## 
##       Endometriosis_0       Endometriosis_1       Endometriosis_2 
##                  1993                  2365                   543 
##       Endometriosis_3       Endometriosis_4       Endometriosis_5 
##                   483                   275                   174 
## Eutopic_Endometrium_0 Eutopic_Endometrium_1 Eutopic_Endometrium_2 
##                  2825                  1065                   178 
## Eutopic_Endometrium_3 Eutopic_Endometrium_4 Eutopic_Endometrium_5 
##                   153                   183                    18

cond_celltype_table <- table(seurat$cond_celltype)
cond_celltype_df <- as.data.frame(cond_celltype_table)
colnames(cond_celltype_df) <- c("Cluster", "Count")

cond_celltype_df <- cond_celltype_df %>%
  mutate(Condition = ifelse(grepl("Endometriosis", Cluster, ignore.case = TRUE),
                            "Endometriosis",
                            "Eutopic_Endometrium"))
cond_celltype_df$Cluster <- sub("^[^0-9]+", "", cond_celltype_df$Cluster)

print(cond_celltype_df)

##    Cluster Count           Condition
## 1        0  1993       Endometriosis
## 2        1  2365       Endometriosis
## 3        2   543       Endometriosis
## 4        3   483       Endometriosis
## 5        4   275       Endometriosis
## 6        5   174       Endometriosis
## 7        0  2825 Eutopic_Endometrium
## 8        1  1065 Eutopic_Endometrium
## 9        2   178 Eutopic_Endometrium
## 10       3   153 Eutopic_Endometrium
## 11       4   183 Eutopic_Endometrium
## 12       5    18 Eutopic_Endometrium

ggplot(cond_celltype_df, aes(x = Cluster, y = Count, fill = Condition)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(title = "Cells Present in Each Cluster", x = "Cluster", y = "Count")

perts <- cond_celltype_df %>%
  group_by(Condition) %>%
  mutate(Total = sum(Count), Percentage = (Count / Total) * 100) %>%
  select(Cluster, Count, Condition, Percentage)

head(perts, 12)

## # A tibble: 12 × 4
## # Groups:   Condition [2]
##    Cluster Count Condition           Percentage
##    <chr>   <int> <chr>                    <dbl>
##  1 0        1993 Endometriosis           34.2  
##  2 1        2365 Endometriosis           40.5  
##  3 2         543 Endometriosis            9.31 
##  4 3         483 Endometriosis            8.28 
##  5 4         275 Endometriosis            4.71 
##  6 5         174 Endometriosis            2.98 
##  7 0        2825 Eutopic_Endometrium     63.9  
##  8 1        1065 Eutopic_Endometrium     24.1  
##  9 2         178 Eutopic_Endometrium      4.03 
## 10 3         153 Eutopic_Endometrium      3.46 
## 11 4         183 Eutopic_Endometrium      4.14 
## 12 5          18 Eutopic_Endometrium      0.407

library(ggplot2)
library(dplyr)
endometriosis_data <- filter(perts, Condition == "Endometriosis")
ggplot(endometriosis_data, aes(x = "", y = Percentage, fill = factor(Cluster))) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y") +
  labs(title = "Endometriosis - Initial Cluster Distribution") +
  theme_void() +
  theme(legend.title = element_blank())

# Plot for Eutopic Endometrium
eutopic_data <- filter(perts, Condition == "Eutopic_Endometrium")
ggplot(eutopic_data, aes(x = "", y = Percentage, fill = factor(Cluster))) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y") +
  labs(title = "Eutopic Endometrium -  Initial Cluster Distribution") +
  theme_void() +
  theme(legend.title = element_blank())

cond_markers0 <- FindMarkers(seurat, ident.1 = "Endometriosis_0", ident.2 = "Eutopic_Endometrium_0", verbose = FALSE)

cond_markers1 <- FindMarkers(seurat, ident.1 = "Endometriosis_1", ident.2 = "Eutopic_Endometrium_1", verbose = FALSE)


cond_markers2 <- FindMarkers(seurat, ident.1 = "Endometriosis_2", ident.2 = "Eutopic_Endometrium_2", verbose = FALSE)

cond_markers3 <- FindMarkers(seurat, ident.1 = "Endometriosis_3", ident.2 = "Eutopic_Endometrium_3", verbose = FALSE)

cond_markers4 <- FindMarkers(seurat, ident.1 = "Endometriosis_4", ident.2 = "Eutopic_Endometrium_4", verbose = FALSE)

cond_markers5 <- FindMarkers(seurat, ident.1 = "Endometriosis_5", ident.2 = "Eutopic_Endometrium_5", verbose = FALSE)

library("knitr")
top_and_bot <- function(ldf, bot = TRUE) {
 
  ldf <- ldf[order(ldf$avg_log2FC, decreasing = TRUE), ]
    if (bot) {
        top_3 <- head(ldf, 3)
        result <- top_3
    }      
  top_5 <- head(ldf, 5)
  result <- top_5

  if (bot) {
    bottom_3 <- tail(ldf, 3)
    result <- rbind(result, bottom_3)
  }
  print(knitr::kable(result, format = "simple"))

  return(result)
}

cluster 0

top_and_bot(markers0, bot = FALSE)

## 
## 
##               p_val   avg_log2FC   pct.1   pct.2   p_val_adj
## -----------  ------  -----------  ------  ------  ----------
## PCSK1             0     8.694589   0.018   0.000           0
## C11orf53          0     8.134797   0.013   0.000           0
## AL117348.1        0     5.530758   0.017   0.000           0
## AL139393.3        0     5.187896   0.013   0.000           0
## AC005062.1        0     4.672949   0.021   0.001           0

##                   p_val avg_log2FC pct.1 pct.2    p_val_adj
## PCSK1      7.902303e-24   8.694589 0.018 0.000 3.020576e-19
## C11orf53   2.737425e-17   8.134797 0.013 0.000 1.046353e-12
## AL117348.1 3.187419e-20   5.530758 0.017 0.000 1.218359e-15
## AL139393.3 3.296785e-16   5.187896 0.013 0.000 1.260163e-11
## AC005062.1 1.799464e-24   4.672949 0.021 0.001 6.878272e-20

#genes from find marker genes cluster 0 
FeaturePlot(seurat, features = c("PCSK1","C11orf53"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))

## Warning: All cells have the same value (0) of "PCSK1"

top_and_bot(cond_markers0)

## 
## 
##           p_val   avg_log2FC   pct.1   pct.2   p_val_adj
## -------  ------  -----------  ------  ------  ----------
## IGHG1         0     13.13153   0.028   0.000           0
## WISP2         0     12.73632   0.130   0.000           0
## TPSAB1        0     12.10728   0.030   0.000           0
## SFRP2         0     10.98770   0.327   0.001           0
## IGLC3         0     10.89223   0.027   0.000           0
## CCN1          0    -11.86982   0.000   0.150           0
## SNHG29        0    -12.33631   0.000   0.218           0
## IGFBP1        0    -15.96739   0.000   0.781           0

##                p_val avg_log2FC pct.1 pct.2     p_val_adj
## IGHG1   3.233099e-19   13.13153 0.028 0.000  1.235820e-14
## WISP2   2.894087e-86   12.73632 0.130 0.000  1.106236e-81
## TPSAB1  1.713441e-20   12.10728 0.030 0.000  6.549459e-16
## SFRP2  8.563385e-231   10.98770 0.327 0.001 3.273268e-226
## IGLC3   1.403127e-18   10.89223 0.027 0.000  5.363313e-14
## CCN1    4.335389e-73  -11.86982 0.000 0.150  1.657159e-68
## SNHG29 7.046366e-110  -12.33631 0.000 0.218 2.693403e-105
## IGFBP1  0.000000e+00  -15.96739 0.000 0.781  0.000000e+00

#proliferative phase marker genes 
FeaturePlot(seurat, features = c("PGR", "ESR1"), reduction = "umap.cca",split.by = "condition", cols = c("lightgray", "red"))

#proliferative phase marker genes 
FeaturePlot(seurat, features = c("IGF1", "MMP11"), reduction = "umap.cca",split.by = "condition", cols = c("lightgray", "red"))

#proliferative phase marker genes 
FeaturePlot(seurat, features = c("CRABP2", "ECM1"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))

#secretory phase marker genes 
FeaturePlot(seurat, features = c("PAEP","FOXO1"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))

Cluster 1

top_and_bot(markers1, bot = FALSE)

## 
## 
##               p_val   avg_log2FC   pct.1   pct.2   p_val_adj
## -----------  ------  -----------  ------  ------  ----------
## AC006369.1        0     8.789679   0.018       0           0
## AL355581.1        0     8.453120   0.013       0           0
## C15orf53          0     8.191206   0.026       0           0
## LINC02195         0     8.007510   0.010       0           0
## TTC16             0     7.880154   0.018       0           0

##                   p_val avg_log2FC pct.1 pct.2    p_val_adj
## AC006369.1 2.898012e-29   8.789679 0.018     0 1.107736e-24
## AL355581.1 8.966751e-22   8.453120 0.013     0 3.427451e-17
## C15orf53   2.503265e-40   8.191206 0.026     0 9.568479e-36
## LINC02195  4.821539e-16   8.007510 0.010     0 1.842985e-11
## TTC16      2.063818e-28   7.880154 0.018     0 7.888737e-24

#genes from find marker genes cluster 1 T- cell CD3D, TRAC, CD3E, TRBC2
FeaturePlot(seurat, features = c("CD3D","TRAC"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))

# PAEP and SPP1 (secretory phase genes) are up in endometriosis compared to Eutopic t-cells
# also up is COLGALT2 wich is overexressed in ovarian cancer

top_and_bot(cond_markers1)

## 
## 
##               p_val   avg_log2FC   pct.1   pct.2   p_val_adj
## -----------  ------  -----------  ------  ------  ----------
## IGLC2             0     9.968327   0.030   0.000   0.0005416
## SFRP2             0     9.153674   0.119   0.000   0.0000000
## AL031777.3        0     8.811295   0.071   0.000   0.0000000
## HEXDC             0     8.679404   0.103   0.000   0.0000000
## KIAA1147          0     8.453847   0.089   0.000   0.0000000
## SEPTIN7           0   -13.099985   0.000   0.444   0.0000000
## LINC-PINT         0   -13.176507   0.000   0.395   0.0000000
## IGFBP1            0   -14.788855   0.000   0.456   0.0000000

##                    p_val avg_log2FC pct.1 pct.2     p_val_adj
## IGLC2       1.416840e-08   9.968327 0.030 0.000  5.415731e-04
## SFRP2       9.590463e-32   9.153674 0.119 0.000  3.665859e-27
## AL031777.3  3.829661e-19   8.811295 0.071 0.000  1.463850e-14
## HEXDC       1.697051e-27   8.679404 0.103 0.000  6.486809e-23
## KIAA1147    8.839429e-24   8.453847 0.089 0.000  3.378783e-19
## SEPTIN7    6.294724e-265 -13.099985 0.000 0.444 2.406095e-260
## LINC-PINT  2.288511e-232 -13.176506 0.000 0.395 8.747606e-228
## IGFBP1     3.192085e-273 -14.788855 0.000 0.456 1.220143e-268

FeaturePlot(seurat, features = c("IGFBP1","PAEP"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))

## Warning: All cells have the same value (0) of "IGFBP1"

Cluster 2

top_and_bot(markers2, bot = FALSE)

## 
## 
##           p_val   avg_log2FC   pct.1   pct.2   p_val_adj
## -------  ------  -----------  ------  ------  ----------
## CD300E        0     11.76204   0.083       0           0
## HK3           0     10.60345   0.049       0           0
## CCL17         0     10.48961   0.015       0           0
## TREM2         0     10.28816   0.082       0           0
## VSTM1         0     10.26670   0.036       0           0

##                p_val avg_log2FC pct.1 pct.2     p_val_adj
## CD300E 1.485122e-175   11.76204 0.083     0 5.676732e-171
## HK3    5.434077e-103   10.60345 0.049     0  2.077122e-98
## CCL17   1.767388e-30   10.48961 0.015     0  6.755664e-26
## TREM2  2.669511e-166   10.28816 0.082     0 1.020394e-161
## VSTM1   6.220418e-77   10.26670 0.036     0  2.377693e-72

#genes from find marker genes cluster 2 macrophage 

FeaturePlot(seurat, features = c("CD14","CD68"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))

top_and_bot(cond_markers2)

## 
## 
##                 p_val   avg_log2FC   pct.1   pct.2   p_val_adj
## ---------  ----------  -----------  ------  ------  ----------
## HBA2        0.0000000     8.990105   0.324   0.006   0.0000000
## IGHG1       0.1039162     8.246955   0.015   0.000   1.0000000
## HLA-DQA2    0.0000000     8.095532   0.477   0.011   0.0000000
## S100A12     0.0000000     7.184686   0.177   0.011   0.0006754
## RBP7        0.0000002     6.990111   0.138   0.000   0.0067005
## DKK1        0.0000000   -10.066691   0.000   0.169   0.0000000
## IGFBP1      0.0000000   -13.413626   0.000   0.652   0.0000000
## PAEP        0.0000000   -13.857578   0.000   0.427   0.0000000

##                 p_val avg_log2FC pct.1 pct.2    p_val_adj
## HBA2     2.480255e-17   8.990105 0.324 0.006 9.480525e-13
## IGHG1    1.039162e-01   8.246955 0.015 0.000 1.000000e+00
## HLA-DQA2 5.527340e-28   8.095532 0.477 0.011 2.112771e-23
## S100A12  1.766985e-08   7.184686 0.177 0.011 6.754125e-04
## RBP7     1.752951e-07   6.990111 0.138 0.000 6.700478e-03
## DKK1     1.645538e-22 -10.066691 0.000 0.169 6.289903e-18
## IGFBP1   1.195240e-92 -13.413626 0.000 0.652 4.568686e-88
## PAEP     5.384122e-58 -13.857578 0.000 0.427 2.058027e-53

#no large diff
FeaturePlot(seurat, features = c("HBA2","PAEP"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))

Cluster 3

top_and_bot(markers3, bot = FALSE)

## 
## 
##               p_val   avg_log2FC   pct.1   pct.2   p_val_adj
## -----------  ------  -----------  ------  ------  ----------
## MSTN              0     8.647461   0.013   0.000           0
## AL591501.1        0     8.405994   0.016   0.000           0
## JAKMIP3           0     8.352714   0.011   0.000           0
## AC107029.2        0     8.193623   0.013   0.000           0
## SCN7A             0     6.244991   0.230   0.002           0

##                   p_val avg_log2FC pct.1 pct.2    p_val_adj
## MSTN       3.682901e-28   8.647461 0.013 0.000 1.407752e-23
## AL591501.1 8.692087e-35   8.405994 0.016 0.000 3.322464e-30
## JAKMIP3    7.645124e-25   8.352714 0.011 0.000 2.922272e-20
## AC107029.2 3.682902e-28   8.193623 0.013 0.000 1.407753e-23
## SCN7A      0.000000e+00   6.244991 0.230 0.002 0.000000e+00

#genes from find marker genes cluster 3
FeaturePlot(seurat, features = c("MSTN","AL591501.1"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))

## Warning: All cells have the same value (0) of "MSTN"

## Warning: All cells have the same value (0) of "AL591501.1"

top_and_bot(cond_markers3)

## 
## 
##            p_val   avg_log2FC   pct.1   pct.2   p_val_adj
## --------  ------  -----------  ------  ------  ----------
## WISP2      0e+00     9.762120   0.451   0.000   0.0000000
## HBB        0e+00     8.761504   0.348   0.013   0.0000000
## IGKC       2e-07     8.718460   0.155   0.000   0.0088675
## GPC3       0e+00     7.824269   0.236   0.000   0.0000017
## NTRK2      0e+00     7.464346   0.226   0.000   0.0000055
## SNHG29     0e+00   -11.348904   0.000   0.601   0.0000000
## SEPTIN7    0e+00   -11.586616   0.000   0.614   0.0000000
## CCN1       0e+00   -12.259395   0.000   0.549   0.0000000

##                p_val avg_log2FC pct.1 pct.2    p_val_adj
## WISP2   2.524869e-23   9.762120 0.451 0.000 9.651061e-19
## HBB     5.762980e-16   8.761504 0.348 0.013 2.202841e-11
## IGKC    2.319871e-07   8.718460 0.155 0.000 8.867476e-03
## GPC3    4.557862e-11   7.824269 0.236 0.000 1.742197e-06
## NTRK2   1.436663e-10   7.464345 0.226 0.000 5.491500e-06
## SNHG29  4.165860e-75 -11.348904 0.000 0.601 1.592358e-70
## SEPTIN7 6.033710e-77 -11.586616 0.000 0.614 2.306325e-72
## CCN1    7.295654e-68 -12.259395 0.000 0.549 2.788691e-63

#big diff
FeaturePlot(seurat, features = c("WISP2","CCN1"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))

## Warning: All cells have the same value (0) of "CCN1"

## Warning: All cells have the same value (0) of "WISP2"

Cluster 4

top_and_bot(markers4, bot = FALSE)

## 
## 
##               p_val   avg_log2FC   pct.1   pct.2   p_val_adj
## -----------  ------  -----------  ------  ------  ----------
## LINC00636         0    10.338143   0.026       0           0
## AC093909.4        0    10.217307   0.017       0           0
## DIPK2B            0     9.718333   0.096       0           0
## ARHGEF15          0     8.501079   0.061       0           0
## LINC01235         0     8.279611   0.022       0           0

##                    p_val avg_log2FC pct.1 pct.2     p_val_adj
## LINC00636   7.887366e-58  10.338143 0.026     0  3.014867e-53
## AC093909.4  3.967643e-39  10.217307 0.017     0  1.516592e-34
## DIPK2B     7.408693e-198   9.718333 0.096     0 2.831899e-193
## ARHGEF15   1.092423e-127   8.501079 0.061     0 4.175677e-123
## LINC01235   5.485774e-40   8.279611 0.022     0  2.096882e-35

#cluster 4
FeaturePlot(seurat, features = c("LINC00636","AC093909.4"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))

top_and_bot(cond_markers4)

## 
## 
##               p_val   avg_log2FC   pct.1   pct.2   p_val_adj
## -------  ----------  -----------  ------  ------  ----------
## SFRP2     0.0000001     8.755766   0.145   0.000   0.0027481
## IGKC      0.0000000     8.533294   0.156   0.000   0.0008023
## NTS       0.0000027     8.342142   0.113   0.000   0.1023558
## DES       0.0001332     8.158068   0.076   0.000   1.0000000
## HBA2      0.0000000     8.130005   0.338   0.038   0.0000000
## CCN2      0.0000000   -10.645983   0.000   0.240   0.0000000
## IGFBP1    0.0000000   -11.629046   0.000   0.530   0.0000000
## PAEP      0.0000000   -12.346616   0.000   0.273   0.0000000

##               p_val avg_log2FC pct.1 pct.2    p_val_adj
## SFRP2  7.189530e-08   8.755766 0.145 0.000 2.748126e-03
## IGKC   2.099052e-08   8.533294 0.156 0.000 8.023416e-04
## NTS    2.677790e-06   8.342142 0.113 0.000 1.023558e-01
## DES    1.332102e-04   8.158068 0.076 0.000 1.000000e+00
## HBA2   6.852031e-15   8.130005 0.338 0.038 2.619121e-10
## CCN2   1.487566e-17 -10.645982 0.000 0.240 5.686071e-13
## IGFBP1 2.815495e-41 -11.629046 0.000 0.530 1.076195e-36
## PAEP   5.544995e-20 -12.346615 0.000 0.273 2.119519e-15

#cluster 4
FeaturePlot(seurat, features = c("SFRP2","PAEP"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))

Cluster 5

top_and_bot(markers5, bot = FALSE)

## 
## 
##            p_val   avg_log2FC   pct.1   pct.2   p_val_adj
## --------  ------  -----------  ------  ------  ----------
## IGHD           0     12.75202   0.161   0.000           0
## IGHA2          0     12.59622   0.094   0.000           0
## FAM129C        0     12.53100   0.245   0.000           0
## CD79A          0     11.98159   0.656   0.001           0
## TLR10          0     11.91428   0.214   0.000           0

##                 p_val avg_log2FC pct.1 pct.2     p_val_adj
## IGHD    3.319619e-172   12.75202 0.161 0.000 1.268891e-167
## IGHA2   1.441515e-100   12.59622 0.094 0.000  5.510045e-96
## FAM129C 7.854885e-261   12.53100 0.245 0.000 3.002451e-256
## CD79A    0.000000e+00   11.98159 0.656 0.001  0.000000e+00
## TLR10   1.555762e-227   11.91428 0.214 0.000 5.946743e-223

#cluster 5
FeaturePlot(seurat, features = c("IGHD","IGHA2"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))

top_and_bot(cond_markers5)

## 
## 
##               p_val   avg_log2FC   pct.1   pct.2   p_val_adj
## -------  ----------  -----------  ------  ------  ----------
## IGLC3     0.0407866    10.281241   0.195   0.000           1
## IGHG1     0.0038076     8.930956   0.437   0.111           1
## IGLC2     0.0366354     8.328134   0.351   0.111           1
## IGHG2     0.0624600     7.605519   0.167   0.000           1
## IGHG3     0.0240335     7.429883   0.230   0.000           1
## GAS5      0.0000000    -9.730238   0.000   0.444           0
## PAEP      0.0000000   -11.551318   0.000   0.333           0
## IGFBP1    0.0000000   -11.734239   0.000   0.556           0

##               p_val avg_log2FC pct.1 pct.2    p_val_adj
## IGLC3  4.078657e-02  10.281241 0.195 0.000 1.000000e+00
## IGHG1  3.807593e-03   8.930956 0.437 0.111 1.000000e+00
## IGLC2  3.663540e-02   8.328134 0.351 0.111 1.000000e+00
## IGHG2  6.246001e-02   7.605519 0.167 0.000 1.000000e+00
## IGHG3  2.403353e-02   7.429883 0.230 0.000 1.000000e+00
## GAS5   3.533936e-19  -9.730238 0.000 0.444 1.350812e-14
## PAEP   1.270008e-14 -11.551318 0.000 0.333 4.854477e-10
## IGFBP1 8.165717e-24 -11.734239 0.000 0.556 3.121264e-19

#cluster 5
FeaturePlot(seurat, features = c("IGLC3","IGHG1"), reduction = "umap.cca", split.by = "condition", cols = c("lightgray", "red"))

## Warning: All cells have the same value (0) of "IGLC3"

Validation Round

Rori Hoover

2024-12-10

cluster 0

Cluster 1

Cluster 2

Cluster 3

Cluster 4

Cluster 5