#In this script I will I will remove Non T cells from PBMC

1. load libraries

2. Load Seurat Object


#Load Seurat Object merged from cell lines and a control(PBMC) after filtration
load("0-imp_Robj/All_Samples_Merged_with_10x_Azitmuth_Annotated_SCT_HPC_without_harmony_integration_removed_nonCD4cells_from_control_and_Bcells_from_L4.robj")


All_samples_Merged
An object of class Seurat 
62931 features across 49388 samples within 6 assays 
Active assay: SCT (26179 features, 3000 variable features)
 3 layers present: counts, data, scale.data
 5 other assays present: RNA, ADT, prediction.score.celltype.l1, prediction.score.celltype.l2, prediction.score.celltype.l3
 4 dimensional reductions calculated: integrated_dr, ref.umap, pca, umap

3. Cell type Distribution to check clusters

# We can apply it later on R obj to get these tables to compare it to decide about resolution.

# Azimuth l1
janitor::tabyl(All_samples_Merged@meta.data, predicted.celltype.l1, cell_line)
 predicted.celltype.l1   L1   L2   L3   L4   L5   L6   L7 PBMC PBMC_10x
                     B    0    0   12    4   35   85  121    0        0
                 CD4 T 5771 3124 6351 5967 4914 4575 4634 5171     3505
                 CD8 T   13   25    0    1    4    5    3    0        0
                    DC    0    0    1    4   29   12   41    0        0
                  Mono    0    0    1   13    3    0    5    0        0
                    NK   38 2784    6   25   11  259   38    0        0
                 other    0    0   57    8 1025  208  487    0        0
               other T    3    2    0    1    1    4    2    0        0
# Azimuth l2
janitor::tabyl(All_samples_Merged@meta.data, predicted.celltype.l2, cell_line)
 predicted.celltype.l2   L1   L2   L3   L4   L5   L6   L7 PBMC PBMC_10x
        B intermediate    0    0    2    1    2    2    0    0        0
              B memory    0    0   11    1   38   82  120    0        0
             CD14 Mono    0    0    1   14    5    0    6    0        0
               CD4 CTL    0    0    0    0    0    0    0   12        1
             CD4 Naive    0    0    0    7    0    0    0  523     1512
     CD4 Proliferating 2461 2852 5452 5391 4732 4002 4115    0        6
               CD4 TCM 3320  270  887  562  178  557  517 4576     1963
               CD4 TEM    1    0    0    0    0    0    0   60       23
     CD8 Proliferating    0    0    0    0    0    1    1    0        0
               CD8 TCM    1   16    0    0    0    0    0    0        0
               CD8 TEM    1    8    0    0    2    3    1    0        0
                  cDC1    0    0    0    0    2    6    0    0        0
                  cDC2    0    0    0    4   11    3   35    0        0
                   dnT    2    3    0    1    2    5    2    0        0
                  HSPC    0    0   60    7 1035  213  490    0        0
                   ILC    0    0    0    1    0    0    0    0        0
                    NK    0    0    0    1    0    0    0    0        0
      NK Proliferating   38 2785    6   24   11  259   38    0        0
                  Treg    1    1    9    9    4   15    6    0        0
# Azimuth l3
janitor::tabyl(All_samples_Merged@meta.data, predicted.celltype.l3, cell_line)
 predicted.celltype.l3   L1   L2   L3   L4   L5   L6   L7 PBMC PBMC_10x
 B intermediate lambda    0    0    2    0    4    2    3    0        0
        B memory kappa    0    0    7    0    7   22   55    0        0
       B memory lambda    0    0    3    1    7   29   26    0        0
             CD14 Mono    0    0    1   15    9    0   22    0        0
               CD4 CTL    0    0    0    0    0    0    0   14        1
             CD4 Naive    0    0    0    9    0    0    0  526     1524
     CD4 Proliferating 2462 2852 5452 5391 4732 4003 4115    0        6
             CD4 TCM_1 1932    6    6   35    0    7    3 4038     1447
             CD4 TCM_2  652  250  870  516  165  536  482  265      144
             CD4 TCM_3  436   12    4    6   13    7   26  265      359
             CD4 TEM_1    1    0    0    0    0    0    0    4        7
             CD4 TEM_2    0    0    0    0    0    0    0   15       16
             CD4 TEM_3    0    0    0    0    0    0    0   44        1
           CD8 Naive_2    0    0    0    0    1    1    0    0        0
     CD8 Proliferating    0    0    0    0    0    1    1    0        0
             CD8 TCM_1    0    8    0    0    0    0    0    0        0
             CD8 TCM_2  298    6    0    0    0    0    0    0        0
             CD8 TCM_3    0    1    0    0    0    0    0    0        0
             CD8 TEM_2    0    0    0    0    0    1    0    0        0
             CD8 TEM_6    1   10    0    0    1    1    1    0        0
                  cDC1    0    0    0    1   17   36   16    0        0
                cDC2_1    0    0    0    1    1    0    4    0        0
                cDC2_2    0    0    1    3   12    4   33    0        0
                 dnT_2    2    3    0    2    2    6    2    0        0
                  HSPC    0    0   62    7 1036  214  493    0        0
                   ILC    0    0    1    3    0    0    0    0        0
      NK Proliferating   38 2785    6   24   11  259   38    0        0
           Treg Memory    3    2   13    9    4   19   11    0        0

4. filter cells just keep CD4 T cells



# Set identity to cell_line 
Idents(All_samples_Merged) <- "cell_line"



# Remove ILC and NK cells based on predicted.celltype.l2
filtered_seurat <- subset(All_samples_Merged, subset = predicted.celltype.l2 != "ILC" & predicted.celltype.l2 != "NK")




library(ggplot2)
library(RColorBrewer)  

# Assuming you have 10 different cell lines, generating a color palette with 10 colors
cell_line_colors <- brewer.pal(10, "Set3")

# Assuming All_samples_Merged$cell_line is a factor or character vector containing cell line names
data <- as.data.frame(table(filtered_seurat$cell_line))
colnames(data) <- c("cell_line", "nUMI")  # Change column name to nUMI

ncells <- ggplot(data, aes(x = cell_line, y = nUMI, fill = cell_line)) + 
  geom_col() +
  theme_classic() +
  geom_text(aes(label = nUMI), 
            position = position_dodge(width = 0.9), 
            vjust = -0.25) +
  scale_fill_manual(values = cell_line_colors) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(hjust = 0.5)) +  # Adjust the title position
  ggtitle("Filtered cells per sample") +
  xlab("Cell lines") +  # Adjust x-axis label
  ylab("Frequency")    # Adjust y-axis label

print(ncells)

NA
NA
NA

Cell type Distribution to check clusters


# We can apply it later on R obj to get these tables to compare it to decide about resolution.

# Azimuth l1
janitor::tabyl(filtered_seurat@meta.data, predicted.celltype.l1, cell_line)
 predicted.celltype.l1   L1   L2   L3   L4   L5   L6   L7 PBMC PBMC_10x
                     B    0    0   12    4   35   85  121    0        0
                 CD4 T 5771 3124 6351 5967 4914 4575 4634 5171     3505
                 CD8 T   13   25    0    1    4    5    3    0        0
                    DC    0    0    1    4   29   12   41    0        0
                  Mono    0    0    1   13    3    0    5    0        0
                    NK   38 2784    6   24   11  259   38    0        0
                 other    0    0   57    7 1025  208  487    0        0
               other T    3    2    0    1    1    4    2    0        0
janitor::tabyl(filtered_seurat@meta.data, predicted.celltype.l2, cell_line)
 predicted.celltype.l2   L1   L2   L3   L4   L5   L6   L7 PBMC PBMC_10x
        B intermediate    0    0    2    1    2    2    0    0        0
              B memory    0    0   11    1   38   82  120    0        0
             CD14 Mono    0    0    1   14    5    0    6    0        0
               CD4 CTL    0    0    0    0    0    0    0   12        1
             CD4 Naive    0    0    0    7    0    0    0  523     1512
     CD4 Proliferating 2461 2852 5452 5391 4732 4002 4115    0        6
               CD4 TCM 3320  270  887  562  178  557  517 4576     1963
               CD4 TEM    1    0    0    0    0    0    0   60       23
     CD8 Proliferating    0    0    0    0    0    1    1    0        0
               CD8 TCM    1   16    0    0    0    0    0    0        0
               CD8 TEM    1    8    0    0    2    3    1    0        0
                  cDC1    0    0    0    0    2    6    0    0        0
                  cDC2    0    0    0    4   11    3   35    0        0
                   dnT    2    3    0    1    2    5    2    0        0
                  HSPC    0    0   60    7 1035  213  490    0        0
      NK Proliferating   38 2785    6   24   11  259   38    0        0
                  Treg    1    1    9    9    4   15    6    0        0
janitor::tabyl(filtered_seurat@meta.data, predicted.celltype.l3, cell_line)
 predicted.celltype.l3   L1   L2   L3   L4   L5   L6   L7 PBMC PBMC_10x
 B intermediate lambda    0    0    2    0    4    2    3    0        0
        B memory kappa    0    0    7    0    7   22   55    0        0
       B memory lambda    0    0    3    1    7   29   26    0        0
             CD14 Mono    0    0    1   14    9    0   22    0        0
               CD4 CTL    0    0    0    0    0    0    0   14        1
             CD4 Naive    0    0    0    9    0    0    0  526     1524
     CD4 Proliferating 2462 2852 5452 5391 4732 4003 4115    0        6
             CD4 TCM_1 1932    6    6   35    0    7    3 4038     1447
             CD4 TCM_2  652  250  870  516  165  536  482  265      144
             CD4 TCM_3  436   12    4    6   13    7   26  265      359
             CD4 TEM_1    1    0    0    0    0    0    0    4        7
             CD4 TEM_2    0    0    0    0    0    0    0   15       16
             CD4 TEM_3    0    0    0    0    0    0    0   44        1
           CD8 Naive_2    0    0    0    0    1    1    0    0        0
     CD8 Proliferating    0    0    0    0    0    1    1    0        0
             CD8 TCM_1    0    8    0    0    0    0    0    0        0
             CD8 TCM_2  298    6    0    0    0    0    0    0        0
             CD8 TCM_3    0    1    0    0    0    0    0    0        0
             CD8 TEM_2    0    0    0    0    0    1    0    0        0
             CD8 TEM_6    1   10    0    0    1    1    1    0        0
                  cDC1    0    0    0    1   17   36   16    0        0
                cDC2_1    0    0    0    1    1    0    4    0        0
                cDC2_2    0    0    1    3   12    4   33    0        0
                 dnT_2    2    3    0    2    2    6    2    0        0
                  HSPC    0    0   62    7 1036  214  493    0        0
                   ILC    0    0    1    2    0    0    0    0        0
      NK Proliferating   38 2785    6   24   11  259   38    0        0
           Treg Memory    3    2   13    9    4   19   11    0        0

3.Save the Seurat object as an Robj file

LS0tCnRpdGxlOiAidXNlIEFubm90YXRlZCBSb2JqIGluY2x1ZGluZyBQQk1DMTB4IHRvIHJlbW92ZSBJTEMgYW5kIE5LLWp1c3Qgb25lIENlbGwiCmF1dGhvcjogTmFzaXIgTWFobW9vZCBBYmJhc2kKZGF0ZTogImByIFN5cy5EYXRlKClgIgpvdXRwdXQ6CiAgIyBwZGZfZG9jdW1lbnQ6IGRlZmF1bHQKICAjIHdvcmRfZG9jdW1lbnQ6IGRlZmF1bHQKICAjIGh0bWxfZG9jdW1lbnQ6IGRlZmF1bHQKICAjcm1kZm9ybWF0czo6cmVhZHRoZWRvd24KICBodG1sX25vdGVib29rOgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIHRvY19jb2xsYXBzZWQ6IHRydWUKLS0tCgoKCiNJbiB0aGlzIHNjcmlwdCBJIHdpbGwgSSB3aWxsIHJlbW92ZSBOb24gVCBjZWxscyBmcm9tIFBCTUMgCgojIDEuIGxvYWQgbGlicmFyaWVzCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQoKbGlicmFyeShTZXVyYXQpCmxpYnJhcnkoU2V1cmF0T2JqZWN0KQpsaWJyYXJ5KFNldXJhdERhdGEpCmxpYnJhcnkocGF0Y2h3b3JrKQoKbGlicmFyeShkcGx5cikKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeShSQ29sb3JCcmV3ZXIpCmxpYnJhcnkobWFncml0dHIpCmxpYnJhcnkoZGJwbHlyKQpsaWJyYXJ5KHJtYXJrZG93bikKbGlicmFyeShrbml0cikKbGlicmFyeSh0aW55dGV4KQojQXppbXV0aCBBbm5vdGF0aW9uIGxpYnJhcmllcwpsaWJyYXJ5KEF6aW11dGgpCiNQcm9qZWNUaWxzIEFubm90YXRpb24gbGlicmFyaWVzCmxpYnJhcnkoU1RBQ0FTKQpsaWJyYXJ5KFByb2plY1RJTHMpCiNzaW5nbGVSIEFubm90YXRpb24gbGlicmFyaWVzCmxpYnJhcnkoU2luZ2xlUikKbGlicmFyeShjZWxsZGV4KQpsaWJyYXJ5KFNpbmdsZUNlbGxFeHBlcmltZW50KQoKbGlicmFyeShjbHVzdHJlZSkKCmBgYAoKCiMgMi4gTG9hZCBTZXVyYXQgT2JqZWN0IApgYGB7ciBsb2FkX3NldXJhdH0KCiNMb2FkIFNldXJhdCBPYmplY3QgbWVyZ2VkIGZyb20gY2VsbCBsaW5lcyBhbmQgYSBjb250cm9sKFBCTUMpIGFmdGVyIGZpbHRyYXRpb24KbG9hZCgiMC1pbXBfUm9iai9BbGxfU2FtcGxlc19NZXJnZWRfd2l0aF8xMHhfQXppdG11dGhfQW5ub3RhdGVkX1NDVF9IUENfd2l0aG91dF9oYXJtb255X2ludGVncmF0aW9uX3JlbW92ZWRfbm9uQ0Q0Y2VsbHNfZnJvbV9jb250cm9sX2FuZF9CY2VsbHNfZnJvbV9MNC5yb2JqIikKCgpBbGxfc2FtcGxlc19NZXJnZWQKCgpgYGAKCiMgMy4gQ2VsbCB0eXBlIERpc3RyaWJ1dGlvbiB0byBjaGVjayBjbHVzdGVycwpgYGB7ciBEaXN0cmlidXRpb24xLCBmaWcuaGVpZ2h0PTEwLCBmaWcud2lkdGg9MTB9CiMgV2UgY2FuIGFwcGx5IGl0IGxhdGVyIG9uIFIgb2JqIHRvIGdldCB0aGVzZSB0YWJsZXMgdG8gY29tcGFyZSBpdCB0byBkZWNpZGUgYWJvdXQgcmVzb2x1dGlvbi4KCiMgQXppbXV0aCBsMQpqYW5pdG9yOjp0YWJ5bChBbGxfc2FtcGxlc19NZXJnZWRAbWV0YS5kYXRhLCBwcmVkaWN0ZWQuY2VsbHR5cGUubDEsIGNlbGxfbGluZSkKCgojIEF6aW11dGggbDIKamFuaXRvcjo6dGFieWwoQWxsX3NhbXBsZXNfTWVyZ2VkQG1ldGEuZGF0YSwgcHJlZGljdGVkLmNlbGx0eXBlLmwyLCBjZWxsX2xpbmUpCgojIEF6aW11dGggbDMKamFuaXRvcjo6dGFieWwoQWxsX3NhbXBsZXNfTWVyZ2VkQG1ldGEuZGF0YSwgcHJlZGljdGVkLmNlbGx0eXBlLmwzLCBjZWxsX2xpbmUpCgpgYGAKCiMgNC4gZmlsdGVyIGNlbGxzIGp1c3Qga2VlcCBDRDQgVCBjZWxscwpgYGB7ciBEaXN0cmlidXRpb24sIGZpZy5oZWlnaHQ9NiwgZmlnLndpZHRoPTEwfQoKCiMgU2V0IGlkZW50aXR5IHRvIGNlbGxfbGluZSAKSWRlbnRzKEFsbF9zYW1wbGVzX01lcmdlZCkgPC0gImNlbGxfbGluZSIKCgoKIyBSZW1vdmUgSUxDIGFuZCBOSyBjZWxscyBiYXNlZCBvbiBwcmVkaWN0ZWQuY2VsbHR5cGUubDIKZmlsdGVyZWRfc2V1cmF0IDwtIHN1YnNldChBbGxfc2FtcGxlc19NZXJnZWQsIHN1YnNldCA9IHByZWRpY3RlZC5jZWxsdHlwZS5sMiAhPSAiSUxDIiAmIHByZWRpY3RlZC5jZWxsdHlwZS5sMiAhPSAiTksiKQoKCgoKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KFJDb2xvckJyZXdlcikgIAoKIyBBc3N1bWluZyB5b3UgaGF2ZSAxMCBkaWZmZXJlbnQgY2VsbCBsaW5lcywgZ2VuZXJhdGluZyBhIGNvbG9yIHBhbGV0dGUgd2l0aCAxMCBjb2xvcnMKY2VsbF9saW5lX2NvbG9ycyA8LSBicmV3ZXIucGFsKDEwLCAiU2V0MyIpCgojIEFzc3VtaW5nIEFsbF9zYW1wbGVzX01lcmdlZCRjZWxsX2xpbmUgaXMgYSBmYWN0b3Igb3IgY2hhcmFjdGVyIHZlY3RvciBjb250YWluaW5nIGNlbGwgbGluZSBuYW1lcwpkYXRhIDwtIGFzLmRhdGEuZnJhbWUodGFibGUoZmlsdGVyZWRfc2V1cmF0JGNlbGxfbGluZSkpCmNvbG5hbWVzKGRhdGEpIDwtIGMoImNlbGxfbGluZSIsICJuVU1JIikgICMgQ2hhbmdlIGNvbHVtbiBuYW1lIHRvIG5VTUkKCm5jZWxscyA8LSBnZ3Bsb3QoZGF0YSwgYWVzKHggPSBjZWxsX2xpbmUsIHkgPSBuVU1JLCBmaWxsID0gY2VsbF9saW5lKSkgKyAKICBnZW9tX2NvbCgpICsKICB0aGVtZV9jbGFzc2ljKCkgKwogIGdlb21fdGV4dChhZXMobGFiZWwgPSBuVU1JKSwgCiAgICAgICAgICAgIHBvc2l0aW9uID0gcG9zaXRpb25fZG9kZ2Uod2lkdGggPSAwLjkpLCAKICAgICAgICAgICAgdmp1c3QgPSAtMC4yNSkgKwogIHNjYWxlX2ZpbGxfbWFudWFsKHZhbHVlcyA9IGNlbGxfbGluZV9jb2xvcnMpICsgCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA0NSwgaGp1c3QgPSAxKSwKICAgICAgICBwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KGhqdXN0ID0gMC41KSkgKyAgIyBBZGp1c3QgdGhlIHRpdGxlIHBvc2l0aW9uCiAgZ2d0aXRsZSgiRmlsdGVyZWQgY2VsbHMgcGVyIHNhbXBsZSIpICsKICB4bGFiKCJDZWxsIGxpbmVzIikgKyAgIyBBZGp1c3QgeC1heGlzIGxhYmVsCiAgeWxhYigiRnJlcXVlbmN5IikgICAgIyBBZGp1c3QgeS1heGlzIGxhYmVsCgpwcmludChuY2VsbHMpCgoKCmBgYAoKCgoKIyBDZWxsIHR5cGUgRGlzdHJpYnV0aW9uIHRvIGNoZWNrIGNsdXN0ZXJzCmBgYHtyIERpc3RyaWJ1dGlvbjIsIGZpZy5oZWlnaHQ9NiwgZmlnLndpZHRoPTEwfQoKIyBXZSBjYW4gYXBwbHkgaXQgbGF0ZXIgb24gUiBvYmogdG8gZ2V0IHRoZXNlIHRhYmxlcyB0byBjb21wYXJlIGl0IHRvIGRlY2lkZSBhYm91dCByZXNvbHV0aW9uLgoKIyBBemltdXRoIGwxCmphbml0b3I6OnRhYnlsKGZpbHRlcmVkX3NldXJhdEBtZXRhLmRhdGEsIHByZWRpY3RlZC5jZWxsdHlwZS5sMSwgY2VsbF9saW5lKQoKCmphbml0b3I6OnRhYnlsKGZpbHRlcmVkX3NldXJhdEBtZXRhLmRhdGEsIHByZWRpY3RlZC5jZWxsdHlwZS5sMiwgY2VsbF9saW5lKQoKCmphbml0b3I6OnRhYnlsKGZpbHRlcmVkX3NldXJhdEBtZXRhLmRhdGEsIHByZWRpY3RlZC5jZWxsdHlwZS5sMywgY2VsbF9saW5lKQoKYGBgCgoKIyAzLlNhdmUgdGhlIFNldXJhdCBvYmplY3QgYXMgYW4gUm9iaiBmaWxlCmBgYHtyIHNhdmVST0JKLCBlY2hvPUZBTFNFfQoKc2F2ZShmaWx0ZXJlZF9zZXVyYXQsIGZpbGUgPSAiMC1pbXBfUm9iai9TU19DRDRfVGNlbGxzX0F6aW11dGhfQW5ub3RhdGVkX1BCTUMxMHhfZXhjbHVkaW5nX25vbkNENF9jZWxsc19mcm9tX0NvbnRyb2xfQmNlbGxzX2Zyb21fTDRfYW5kX0lMQ19OS19qdXN0X29uZUNlbGwucm9iaiIpCgoKYGBgCgoKCgoKCgo=