1. load libraries
2. Load Seurat Object
# load("/home/bioinfo/Cluster_to_Computer_Transfer_files_folder/All_Normal-PBMC_Abnormal-cellLines_T_cells_Merged_Annotated_UMAP_on_Clusters_to_USE.Robj")
library(ape)
library(dendextend)
library(ggplot2)
library(dplyr)
# Read the observation groupings
groupings <- read.table("L1_L2_P1_inferCNV/infercnv.observation_groupings.txt", header=TRUE, stringsAsFactors=FALSE, row.names=1)
# Convert Dendrogram.Group to a factor, then to numeric
groupings$Dendrogram.Group_Numeric <- as.numeric(factor(groupings$Dendrogram.Group))
# Create a distance matrix based on the numeric dendrogram groups
dist_matrix <- dist(groupings$Dendrogram.Group_Numeric)
# Create a dendrogram
dend <- as.dendrogram(hclust(dist_matrix))
# Color the dendrogram branches
unique_colors <- unique(groupings$Dendrogram.Color)
dend <- color_branches(dend, k=length(unique(groupings$Dendrogram.Group)),
col=unique_colors)
# Convert to Newick format and write to file
newick <- write.tree(as.phylo(dend))
write(newick, file="infercnv.observations_dendrogram.txt")
# Plot the dendrogram
plot(dend, main="Dendrogram of Cells")

# Create a data frame for cell line information
cell_data <- data.frame(
cell = rownames(groupings),
cell_line = ifelse(grepl("^L1_", rownames(groupings)), "L1",
ifelse(grepl("^L2_", rownames(groupings)), "L2", "Other"))
)
# Create a heatmap-like visualization
p <- ggplot(cell_data, aes(x=1, y=cell, fill=cell_line)) +
geom_tile() +
scale_fill_manual(values=c("L1"="darkgreen", "L2"="purple", "Other"="gray")) +
theme_minimal() +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
legend.position="bottom") +
labs(x=NULL, y=NULL, fill="Cell Line", title="Cell Line Distribution")
# Display the plot
print(p)

# Print summary
cat("Number of cells in L1:", sum(cell_data$cell_line == "L1"), "\n")
Number of cells in L1: 5825
cat("Number of cells in L2:", sum(cell_data$cell_line == "L2"), "\n")
Number of cells in L2: 5935
cat("Number of other cells:", sum(cell_data$cell_line == "Other"), "\n")
Number of other cells: 0
# Print the number of unique groups
cat("Number of unique Dendrogram Groups:", length(unique(groupings$Dendrogram.Group)), "\n")
Number of unique Dendrogram Groups: 119
3. Phylogeney of inferCNV
library(ggplot2)
library(dplyr)
library(tidyr)
library(ggdendro)
library(dendextend)
library(patchwork)
library(tibble)
# Read the heatmap thresholds
thresholds <- read.table("L1_L2_P1_inferCNV/infercnv.heatmap_thresholds.txt", header=FALSE)$V1
# Read the observation groupings
groupings <- read.table("L1_L2_P1_inferCNV/infercnv.observation_groupings.txt", header=TRUE, stringsAsFactors=FALSE, row.names=1)
# Process the groupings data
metadata <- groupings %>%
rownames_to_column("sample") %>%
rename(
dendrogram_group = Dendrogram.Group,
dendrogram_color = Dendrogram.Color,
annotation_group = Annotation.Group,
annotation_color = Annotation.Color
) %>%
mutate(
patient = case_when(
grepl("^L1_|^L2_", sample) ~ "P1",
grepl("^L3_|^L4_", sample) ~ "P2",
grepl("^L5_|^L6_|^L7_", sample) ~ "P3",
TRUE ~ NA_character_
),
annotation_group = as.factor(annotation_group) # Convert to factor
)
# Convert dendrogram_group to numeric if it's not already
metadata$dendrogram_group <- as.numeric(as.factor(metadata$dendrogram_group))
# Remove rows with NA in dendrogram_group
metadata <- metadata %>% filter(!is.na(dendrogram_group))
# Create a distance matrix based on the dendrogram groups
dist_matrix <- dist(metadata$dendrogram_group)
# Create a dendrogram
dend <- as.dendrogram(hclust(dist_matrix))
# Color the dendrogram branches
dend <- color_branches(dend, k=length(unique(metadata$dendrogram_group)),
col=unique(metadata$dendrogram_color))
# Plot the dendrogram
p1 <- ggdendrogram(dend, rotate=TRUE, size=2) +
labs(title="Dendrogram of Sézary Cell Lines (L1 and L2)") +
theme(plot.title = element_text(hjust = 0.5))
# Create a heatmap-like plot of the groupings
p2 <- ggplot(metadata, aes(x=1, y=sample, fill=annotation_group)) +
geom_tile() +
scale_fill_manual(values=setNames(metadata$annotation_color, metadata$annotation_group)) +
theme_minimal() +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
legend.position="bottom") +
labs(x=NULL, y=NULL, fill="Annotation Group")
# Combine the plots
combined_plot <- p1 + p2 + plot_layout(widths=c(3, 1))
# Display the plot in the notebook
combined_plot

library(ggplot2)
library(dplyr)
library(tidyr)
library(pheatmap)
library(RColorBrewer)
# Read the observation groupings
groupings <- read.table("L1_L2_P1_inferCNV/infercnv.observation_groupings.txt", header=TRUE, stringsAsFactors=FALSE, row.names=1)
# Filter for L1 and L2 cells and create a data frame
l1_l2_data <- groupings %>%
filter(grepl("^L1_|^L2_", rownames(.))) %>%
mutate(
Cell_Line = ifelse(grepl("^L1_", rownames(.)), "L1", "L2"),
Dendrogram_Group = as.numeric(factor(Dendrogram.Group)),
Annotation_Group = as.numeric(factor(Annotation.Group))
) %>%
select(Cell_Line, Dendrogram_Group, Annotation_Group)
# Create a matrix for the heatmap
heatmap_matrix <- l1_l2_data %>%
select(Dendrogram_Group, Annotation_Group) %>%
as.matrix()
rownames(heatmap_matrix) <- rownames(l1_l2_data)
# Create annotation for cell lines
cell_line_anno <- data.frame(
Cell_Line = l1_l2_data$Cell_Line
)
rownames(cell_line_anno) <- rownames(l1_l2_data)
# Create color palettes
group_colors <- colorRampPalette(brewer.pal(8, "Set3"))(length(unique(c(heatmap_matrix))))
# Create the heatmap
pheatmap(
heatmap_matrix,
color = group_colors,
cluster_rows = TRUE,
cluster_cols = FALSE,
show_rownames = FALSE,
annotation_row = cell_line_anno,
annotation_colors = list(Cell_Line = c(L1 = "darkgreen", L2 = "purple")),
main = "Groupings of L1 and L2 Sézary Cell Lines",
labels_col = c("Dendrogram Group", "Annotation Group")
)

9. Save the Seurat object as an Robj file
#save(All_samples_Merged, file = "../5-SS_ScRNA_Data_Analysis/4-ScSS_MyAnalysis_on_SS/0-Important_R_OBJ/All_samples_Merged_WNN_correct_on_HPC.Robj")
---
title: "Phylogeny_based on inferCNV"
author: Nasir Mahmood Abbasi
date: "`r Sys.Date()`"
output:
  #rmdformats::readthedown
  html_notebook:
    toc: true
    toc_float: true
    toc_collapsed: true
---

# 1. load libraries
```{r setup, include=FALSE}
library(Seurat)
library(SeuratObject)
library(SeuratData)
library(patchwork)
library(harmony)
library(ggplot2)
library(cowplot)
library(reticulate)
library(Azimuth)
library(dplyr)
library(Rtsne)
library(harmony)
library(gridExtra)
library(ape)
library(pheatmap)

library(ggtree)
library(dplyr)
library(tidyr)
library(readr)
library(ggdendro)
library(dendextend)



```
# 2. Load Seurat Object 
```{r load_seurat}
# load("/home/bioinfo/Cluster_to_Computer_Transfer_files_folder/All_Normal-PBMC_Abnormal-cellLines_T_cells_Merged_Annotated_UMAP_on_Clusters_to_USE.Robj")

library(ape)
library(dendextend)
library(ggplot2)
library(dplyr)

# Read the observation groupings
groupings <- read.table("L1_L2_P1_inferCNV/infercnv.observation_groupings.txt", header=TRUE, stringsAsFactors=FALSE, row.names=1)

# Convert Dendrogram.Group to a factor, then to numeric
groupings$Dendrogram.Group_Numeric <- as.numeric(factor(groupings$Dendrogram.Group))

# Create a distance matrix based on the numeric dendrogram groups
dist_matrix <- dist(groupings$Dendrogram.Group_Numeric)

# Create a dendrogram
dend <- as.dendrogram(hclust(dist_matrix))

# Color the dendrogram branches
unique_colors <- unique(groupings$Dendrogram.Color)
dend <- color_branches(dend, k=length(unique(groupings$Dendrogram.Group)), 
                       col=unique_colors)

# Convert to Newick format and write to file
newick <- write.tree(as.phylo(dend))
write(newick, file="infercnv.observations_dendrogram.txt")

# Plot the dendrogram
plot(dend, main="Dendrogram of Cells")

# Create a data frame for cell line information
cell_data <- data.frame(
  cell = rownames(groupings),
  cell_line = ifelse(grepl("^L1_", rownames(groupings)), "L1", 
                     ifelse(grepl("^L2_", rownames(groupings)), "L2", "Other"))
)

# Create a heatmap-like visualization
p <- ggplot(cell_data, aes(x=1, y=cell, fill=cell_line)) +
  geom_tile() +
  scale_fill_manual(values=c("L1"="darkgreen", "L2"="purple", "Other"="gray")) +
  theme_minimal() +
  theme(axis.text.y=element_blank(), 
        axis.ticks.y=element_blank(),
        legend.position="bottom") +
  labs(x=NULL, y=NULL, fill="Cell Line", title="Cell Line Distribution")

# Display the plot
print(p)

# Print summary
cat("Number of cells in L1:", sum(cell_data$cell_line == "L1"), "\n")
cat("Number of cells in L2:", sum(cell_data$cell_line == "L2"), "\n")
cat("Number of other cells:", sum(cell_data$cell_line == "Other"), "\n")

# Print the number of unique groups
cat("Number of unique Dendrogram Groups:", length(unique(groupings$Dendrogram.Group)), "\n")

```


# 3. Phylogeney of inferCNV
```{r phylogeny_inferCNV_L1L2, fig.height=8, fig.width=12}

library(ggplot2)
library(dplyr)
library(tidyr)
library(ggdendro)
library(dendextend)
library(patchwork)
library(tibble)

# Read the heatmap thresholds
thresholds <- read.table("L1_L2_P1_inferCNV/infercnv.heatmap_thresholds.txt", header=FALSE)$V1

# Read the observation groupings
groupings <- read.table("L1_L2_P1_inferCNV/infercnv.observation_groupings.txt", header=TRUE, stringsAsFactors=FALSE, row.names=1)

# Process the groupings data
metadata <- groupings %>%
  rownames_to_column("sample") %>%
  rename(
    dendrogram_group = Dendrogram.Group,
    dendrogram_color = Dendrogram.Color,
    annotation_group = Annotation.Group,
    annotation_color = Annotation.Color
  ) %>%
  mutate(
    patient = case_when(
      grepl("^L1_|^L2_", sample) ~ "P1",
      grepl("^L3_|^L4_", sample) ~ "P2",
      grepl("^L5_|^L6_|^L7_", sample) ~ "P3",
      TRUE ~ NA_character_
    ),
    annotation_group = as.factor(annotation_group)  # Convert to factor
  )

# Convert dendrogram_group to numeric if it's not already
metadata$dendrogram_group <- as.numeric(as.factor(metadata$dendrogram_group))

# Remove rows with NA in dendrogram_group
metadata <- metadata %>% filter(!is.na(dendrogram_group))

# Create a distance matrix based on the dendrogram groups
dist_matrix <- dist(metadata$dendrogram_group)

# Create a dendrogram
dend <- as.dendrogram(hclust(dist_matrix))

# Color the dendrogram branches
dend <- color_branches(dend, k=length(unique(metadata$dendrogram_group)), 
                       col=unique(metadata$dendrogram_color))

# Plot the dendrogram
p1 <- ggdendrogram(dend, rotate=TRUE, size=2) +
  labs(title="Dendrogram of Sézary Cell Lines (L1 and L2)") +
  theme(plot.title = element_text(hjust = 0.5))

# Create a heatmap-like plot of the groupings
p2 <- ggplot(metadata, aes(x=1, y=sample, fill=annotation_group)) +
  geom_tile() +
  scale_fill_manual(values=setNames(metadata$annotation_color, metadata$annotation_group)) +
  theme_minimal() +
  theme(axis.text.y=element_blank(), 
        axis.ticks.y=element_blank(),
        legend.position="bottom") +
  labs(x=NULL, y=NULL, fill="Annotation Group")

# Combine the plots
combined_plot <- p1 + p2 + plot_layout(widths=c(3, 1))

# Display the plot in the notebook
combined_plot
```



```{r phylogeny_Seurat2, fig.height=8, fig.width=12}

library(ggplot2)
library(dplyr)
library(tidyr)
library(pheatmap)
library(RColorBrewer)

# Read the observation groupings
groupings <- read.table("L1_L2_P1_inferCNV/infercnv.observation_groupings.txt", header=TRUE, stringsAsFactors=FALSE, row.names=1)

# Filter for L1 and L2 cells and create a data frame
l1_l2_data <- groupings %>%
  filter(grepl("^L1_|^L2_", rownames(.))) %>%
  mutate(
    Cell_Line = ifelse(grepl("^L1_", rownames(.)), "L1", "L2"),
    Dendrogram_Group = as.numeric(factor(Dendrogram.Group)),
    Annotation_Group = as.numeric(factor(Annotation.Group))
  ) %>%
  select(Cell_Line, Dendrogram_Group, Annotation_Group)

# Create a matrix for the heatmap
heatmap_matrix <- l1_l2_data %>%
  select(Dendrogram_Group, Annotation_Group) %>%
  as.matrix()

rownames(heatmap_matrix) <- rownames(l1_l2_data)

# Create annotation for cell lines
cell_line_anno <- data.frame(
  Cell_Line = l1_l2_data$Cell_Line
)
rownames(cell_line_anno) <- rownames(l1_l2_data)

# Create color palettes
group_colors <- colorRampPalette(brewer.pal(8, "Set3"))(length(unique(c(heatmap_matrix))))

# Create the heatmap
pheatmap(
  heatmap_matrix,
  color = group_colors,
  cluster_rows = TRUE,
  cluster_cols = FALSE,
  show_rownames = FALSE,
  annotation_row = cell_line_anno,
  annotation_colors = list(Cell_Line = c(L1 = "darkgreen", L2 = "purple")),
  main = "Groupings of L1 and L2 Sézary Cell Lines",
  labels_col = c("Dendrogram Group", "Annotation Group")
)
```

# 9. Save the Seurat object as an Robj file
```{r saveROBJ}

#save(All_samples_Merged, file = "../5-SS_ScRNA_Data_Analysis/4-ScSS_MyAnalysis_on_SS/0-Important_R_OBJ/All_samples_Merged_WNN_correct_on_HPC.Robj")


```




