1. load libraries
2. Load Seurat Object
load("/home/bioinfo/Cluster_to_Computer_Transfer_files_folder/All_Normal-PBMC_Abnormal-cellLines_T_cells_Merged_Annotated_UMAP_on_Clusters_to_USE.Robj")
3. Phylogeney of RNA based Expression-eucleadian
DefaultAssay(All_samples_Merged) <- 'SCT'
# 1. Calculate average expression for each gene in each cell line
avg_expression <- AggregateExpression(All_samples_Merged,
group.by = "cell_line",
assays = "SCT",
slot = "data",
return.seurat = FALSE)$SCT
# 2. Select top variable genes
var_genes <- VariableFeatures(All_samples_Merged)
avg_expression_var <- avg_expression[var_genes,]
# Convert to matrix and handle potential issues
avg_expression_mat <- as.matrix(avg_expression_var)
print(paste("Dimensions of expression matrix:", paste(dim(avg_expression_mat), collapse = "x")))
[1] "Dimensions of expression matrix: 3000x8"
# Remove any genes with zero variance
gene_var <- apply(avg_expression_mat, 1, var)
avg_expression_mat <- avg_expression_mat[gene_var > 0, ]
print(paste("Dimensions after removing zero variance genes:", paste(dim(avg_expression_mat), collapse = "x")))
[1] "Dimensions after removing zero variance genes: 3000x8"
# 3. Calculate distance matrix
dist_matrix <- dist(t(avg_expression_mat))
Found more than one class "dist" in cache; using the first, from namespace 'spam'
Also defined by 'BiocGenerics'
# Check distance matrix
print(paste("Any NA in distance matrix:", any(is.na(dist_matrix))))
[1] "Any NA in distance matrix: FALSE"
print(paste("Any infinite values in distance matrix:", any(is.infinite(dist_matrix))))
[1] "Any infinite values in distance matrix: FALSE"
print("Summary of distance values:")
[1] "Summary of distance values:"
print(summary(as.vector(dist_matrix)))
Min. 1st Qu. Median Mean 3rd Qu. Max.
592041 1136261 1515296 1702792 2290386 2801467
# 4. Create phylogenetic tree
if (any(is.na(dist_matrix))) {
print("Distance matrix contains NA values, cannot create phylogenetic tree")
# Create a simple dendrogram instead
hc <- hclust(dist_matrix)
plot(hc, main="Dendrogram of Cell Lines Based on Gene Expression")
} else {
tree <- nj(dist_matrix)
plot(tree, main="Phylogenetic Tree of Cell Lines Based on Gene Expression")
}

# 5. Create a heatmap
pheatmap(avg_expression_mat,
cluster_rows = TRUE,
cluster_cols = TRUE,
show_rownames = FALSE,
main = "Heatmap of Top Variable Genes Across Cell Lines")

NA
NA
4. Phylogeney of RNA based Expression-Pearson
# Check if SCT assay exists and perform SCTransform if needed
DefaultAssay(All_samples_Merged) <- 'SCT'
# 1. Verify that cell_line metadata exists
if (!"cell_line" %in% colnames(All_samples_Merged@meta.data)) {
stop("cell_line column not found in metadata. Ensure each cell has a cell line label.")
}
# 2. Calculate average expression for each gene in each cell line
avg_expression <- AggregateExpression(All_samples_Merged,
group.by = "cell_line",
assays = "SCT",
slot = "data",
return.seurat = FALSE)$SCT
# Ensure that the aggregation is correct
print(paste("Number of genes:", nrow(avg_expression)))
[1] "Number of genes: 25901"
print(paste("Number of cell lines:", ncol(avg_expression)))
[1] "Number of cell lines: 8"
# 3. Select top variable genes for phylogenetic analysis
var_genes <- VariableFeatures(All_samples_Merged)
avg_expression_var <- avg_expression[var_genes, ]
# Convert to matrix: genes as rows, cell lines as columns
avg_expression_mat <- as.matrix(avg_expression_var)
print(paste("Dimensions of expression matrix:", paste(dim(avg_expression_mat), collapse = "x")))
[1] "Dimensions of expression matrix: 3000x8"
# Remove genes with zero variance
gene_var <- apply(avg_expression_mat, 1, var)
avg_expression_mat <- avg_expression_mat[gene_var > 0, ]
print(paste("Dimensions after removing zero variance genes:", paste(dim(avg_expression_mat), collapse = "x")))
[1] "Dimensions after removing zero variance genes: 3000x8"
# 4. Calculate Pearson correlation distance between **cell lines**
# Transpose the matrix so that cell lines are compared
cor_matrix <- cor(avg_expression_mat, method = "pearson")
cor_dist <- as.dist(1 - cor_matrix)
Found more than one class "dist" in cache; using the first, from namespace 'spam'
Also defined by 'BiocGenerics'
# Check distance matrix
print(paste("Any NA in distance matrix:", any(is.na(cor_dist))))
[1] "Any NA in distance matrix: FALSE"
print(paste("Any infinite values in distance matrix:", any(is.infinite(cor_dist))))
[1] "Any infinite values in distance matrix: FALSE"
print("Summary of distance values:")
[1] "Summary of distance values:"
print(summary(as.vector(cor_dist)))
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.02325 0.08369 0.13251 0.13627 0.19588 0.24904
# 5. Create phylogenetic tree using NJ or dendrogram as fallback
library(ape) # Ensure ape is loaded for nj()
if (any(is.na(cor_dist))) {
print("Distance matrix contains NA values, cannot create phylogenetic tree")
# Create a dendrogram as fallback
hc <- hclust(cor_dist)
plot(hc, main="Dendrogram of Cell Lines Based on Correlation Distance")
} else {
tree <- nj(cor_dist)
plot(tree, main="Phylogenetic Tree of Cell Lines Based on Correlation Distance")
}

# 6. Create heatmap for visualization (ensure it clusters cell lines)
library(pheatmap)
pheatmap(avg_expression_mat,
cluster_rows = FALSE, # Do not cluster genes
cluster_cols = TRUE, # Cluster cell lines (columns)
show_rownames = FALSE,
main = "Heatmap of Top Variable Genes Across Cell Lines")

NA
NA
6. Save the Seurat object as an Robj file
#save(All_samples_Merged, file = "../5-SS_ScRNA_Data_Analysis/4-ScSS_MyAnalysis_on_SS/0-Important_R_OBJ/All_samples_Merged_WNN_correct_on_HPC.Robj")
LS0tCnRpdGxlOiAiUGh5bG9nZW55X2Jhc2VkIG9uIFJOQSBFeHByZXNzaW9uIgphdXRob3I6IE5hc2lyIE1haG1vb2QgQWJiYXNpCmRhdGU6ICJgciBTeXMuRGF0ZSgpYCIKb3V0cHV0OgogICNybWRmb3JtYXRzOjpyZWFkdGhlZG93bgogIGh0bWxfbm90ZWJvb2s6CiAgICB0b2M6IHRydWUKICAgIHRvY19mbG9hdDogdHJ1ZQogICAgdG9jX2NvbGxhcHNlZDogdHJ1ZQotLS0KCiMgMS4gbG9hZCBsaWJyYXJpZXMKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmxpYnJhcnkoU2V1cmF0KQpsaWJyYXJ5KFNldXJhdE9iamVjdCkKbGlicmFyeShTZXVyYXREYXRhKQpsaWJyYXJ5KHBhdGNod29yaykKbGlicmFyeShoYXJtb255KQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkoY293cGxvdCkKbGlicmFyeShyZXRpY3VsYXRlKQpsaWJyYXJ5KEF6aW11dGgpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoUnRzbmUpCmxpYnJhcnkoaGFybW9ueSkKbGlicmFyeShncmlkRXh0cmEpCmxpYnJhcnkoYXBlKQpsaWJyYXJ5KHBoZWF0bWFwKQoKYGBgCiMgMi4gTG9hZCBTZXVyYXQgT2JqZWN0IApgYGB7ciBsb2FkX3NldXJhdH0KbG9hZCgiL2hvbWUvYmlvaW5mby9DbHVzdGVyX3RvX0NvbXB1dGVyX1RyYW5zZmVyX2ZpbGVzX2ZvbGRlci9BbGxfTm9ybWFsLVBCTUNfQWJub3JtYWwtY2VsbExpbmVzX1RfY2VsbHNfTWVyZ2VkX0Fubm90YXRlZF9VTUFQX29uX0NsdXN0ZXJzX3RvX1VTRS5Sb2JqIikKCmBgYAoKCiMgMy4gUGh5bG9nZW5leSBvZiBSTkEgYmFzZWQgRXhwcmVzc2lvbi1ldWNsZWFkaWFuCmBgYHtyIHBoeWxvZ2VueV9STkEsIGZpZy5oZWlnaHQ9OCwgZmlnLndpZHRoPTEyfQoKRGVmYXVsdEFzc2F5KEFsbF9zYW1wbGVzX01lcmdlZCkgPC0gJ1NDVCcKCiMgMS4gQ2FsY3VsYXRlIGF2ZXJhZ2UgZXhwcmVzc2lvbiBmb3IgZWFjaCBnZW5lIGluIGVhY2ggY2VsbCBsaW5lCmF2Z19leHByZXNzaW9uIDwtIEFnZ3JlZ2F0ZUV4cHJlc3Npb24oQWxsX3NhbXBsZXNfTWVyZ2VkLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBncm91cC5ieSA9ICJjZWxsX2xpbmUiLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBhc3NheXMgPSAiU0NUIiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc2xvdCA9ICJkYXRhIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICByZXR1cm4uc2V1cmF0ID0gRkFMU0UpJFNDVAoKIyAyLiBTZWxlY3QgdG9wIHZhcmlhYmxlIGdlbmVzCnZhcl9nZW5lcyA8LSBWYXJpYWJsZUZlYXR1cmVzKEFsbF9zYW1wbGVzX01lcmdlZCkKYXZnX2V4cHJlc3Npb25fdmFyIDwtIGF2Z19leHByZXNzaW9uW3Zhcl9nZW5lcyxdCgojIENvbnZlcnQgdG8gbWF0cml4IGFuZCBoYW5kbGUgcG90ZW50aWFsIGlzc3VlcwphdmdfZXhwcmVzc2lvbl9tYXQgPC0gYXMubWF0cml4KGF2Z19leHByZXNzaW9uX3ZhcikKcHJpbnQocGFzdGUoIkRpbWVuc2lvbnMgb2YgZXhwcmVzc2lvbiBtYXRyaXg6IiwgcGFzdGUoZGltKGF2Z19leHByZXNzaW9uX21hdCksIGNvbGxhcHNlID0gIngiKSkpCgojIFJlbW92ZSBhbnkgZ2VuZXMgd2l0aCB6ZXJvIHZhcmlhbmNlCmdlbmVfdmFyIDwtIGFwcGx5KGF2Z19leHByZXNzaW9uX21hdCwgMSwgdmFyKQphdmdfZXhwcmVzc2lvbl9tYXQgPC0gYXZnX2V4cHJlc3Npb25fbWF0W2dlbmVfdmFyID4gMCwgXQpwcmludChwYXN0ZSgiRGltZW5zaW9ucyBhZnRlciByZW1vdmluZyB6ZXJvIHZhcmlhbmNlIGdlbmVzOiIsIHBhc3RlKGRpbShhdmdfZXhwcmVzc2lvbl9tYXQpLCBjb2xsYXBzZSA9ICJ4IikpKQoKIyAzLiBDYWxjdWxhdGUgZGlzdGFuY2UgbWF0cml4CmRpc3RfbWF0cml4IDwtIGRpc3QodChhdmdfZXhwcmVzc2lvbl9tYXQpKQoKIyBDaGVjayBkaXN0YW5jZSBtYXRyaXgKcHJpbnQocGFzdGUoIkFueSBOQSBpbiBkaXN0YW5jZSBtYXRyaXg6IiwgYW55KGlzLm5hKGRpc3RfbWF0cml4KSkpKQpwcmludChwYXN0ZSgiQW55IGluZmluaXRlIHZhbHVlcyBpbiBkaXN0YW5jZSBtYXRyaXg6IiwgYW55KGlzLmluZmluaXRlKGRpc3RfbWF0cml4KSkpKQpwcmludCgiU3VtbWFyeSBvZiBkaXN0YW5jZSB2YWx1ZXM6IikKcHJpbnQoc3VtbWFyeShhcy52ZWN0b3IoZGlzdF9tYXRyaXgpKSkKCiMgNC4gQ3JlYXRlIHBoeWxvZ2VuZXRpYyB0cmVlCmlmIChhbnkoaXMubmEoZGlzdF9tYXRyaXgpKSkgewogIHByaW50KCJEaXN0YW5jZSBtYXRyaXggY29udGFpbnMgTkEgdmFsdWVzLCBjYW5ub3QgY3JlYXRlIHBoeWxvZ2VuZXRpYyB0cmVlIikKICAjIENyZWF0ZSBhIHNpbXBsZSBkZW5kcm9ncmFtIGluc3RlYWQKICBoYyA8LSBoY2x1c3QoZGlzdF9tYXRyaXgpCiAgcGxvdChoYywgbWFpbj0iRGVuZHJvZ3JhbSBvZiBDZWxsIExpbmVzIEJhc2VkIG9uIEdlbmUgRXhwcmVzc2lvbiIpCn0gZWxzZSB7CiAgdHJlZSA8LSBuaihkaXN0X21hdHJpeCkKICBwbG90KHRyZWUsIG1haW49IlBoeWxvZ2VuZXRpYyBUcmVlIG9mIENlbGwgTGluZXMgQmFzZWQgb24gR2VuZSBFeHByZXNzaW9uIikKfQoKIyA1LiBDcmVhdGUgYSBoZWF0bWFwCnBoZWF0bWFwKGF2Z19leHByZXNzaW9uX21hdCwgCiAgICAgICAgIGNsdXN0ZXJfcm93cyA9IFRSVUUsIAogICAgICAgICBjbHVzdGVyX2NvbHMgPSBUUlVFLCAKICAgICAgICAgc2hvd19yb3duYW1lcyA9IEZBTFNFLAogICAgICAgICBtYWluID0gIkhlYXRtYXAgb2YgVG9wIFZhcmlhYmxlIEdlbmVzIEFjcm9zcyBDZWxsIExpbmVzIikKCgpgYGAKIyA0LiBQaHlsb2dlbmV5IG9mIFJOQSBiYXNlZCBFeHByZXNzaW9uLVBlYXJzb24KYGBge3IgcGh5bG9nZW55X1BlYXJzb24sIGZpZy5oZWlnaHQ9OCwgZmlnLndpZHRoPTEyfQoKIyBDaGVjayBpZiBTQ1QgYXNzYXkgZXhpc3RzIGFuZCBwZXJmb3JtIFNDVHJhbnNmb3JtIGlmIG5lZWRlZApEZWZhdWx0QXNzYXkoQWxsX3NhbXBsZXNfTWVyZ2VkKSA8LSAnU0NUJwoKCiMgMS4gVmVyaWZ5IHRoYXQgY2VsbF9saW5lIG1ldGFkYXRhIGV4aXN0cwppZiAoISJjZWxsX2xpbmUiICVpbiUgY29sbmFtZXMoQWxsX3NhbXBsZXNfTWVyZ2VkQG1ldGEuZGF0YSkpIHsKICBzdG9wKCJjZWxsX2xpbmUgY29sdW1uIG5vdCBmb3VuZCBpbiBtZXRhZGF0YS4gRW5zdXJlIGVhY2ggY2VsbCBoYXMgYSBjZWxsIGxpbmUgbGFiZWwuIikKfQoKIyAyLiBDYWxjdWxhdGUgYXZlcmFnZSBleHByZXNzaW9uIGZvciBlYWNoIGdlbmUgaW4gZWFjaCBjZWxsIGxpbmUKYXZnX2V4cHJlc3Npb24gPC0gQWdncmVnYXRlRXhwcmVzc2lvbihBbGxfc2FtcGxlc19NZXJnZWQsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGdyb3VwLmJ5ID0gImNlbGxfbGluZSIsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGFzc2F5cyA9ICJTQ1QiLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzbG90ID0gImRhdGEiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHJldHVybi5zZXVyYXQgPSBGQUxTRSkkU0NUCgojIEVuc3VyZSB0aGF0IHRoZSBhZ2dyZWdhdGlvbiBpcyBjb3JyZWN0CnByaW50KHBhc3RlKCJOdW1iZXIgb2YgZ2VuZXM6IiwgbnJvdyhhdmdfZXhwcmVzc2lvbikpKQpwcmludChwYXN0ZSgiTnVtYmVyIG9mIGNlbGwgbGluZXM6IiwgbmNvbChhdmdfZXhwcmVzc2lvbikpKQoKIyAzLiBTZWxlY3QgdG9wIHZhcmlhYmxlIGdlbmVzIGZvciBwaHlsb2dlbmV0aWMgYW5hbHlzaXMKdmFyX2dlbmVzIDwtIFZhcmlhYmxlRmVhdHVyZXMoQWxsX3NhbXBsZXNfTWVyZ2VkKQphdmdfZXhwcmVzc2lvbl92YXIgPC0gYXZnX2V4cHJlc3Npb25bdmFyX2dlbmVzLCBdCgojIENvbnZlcnQgdG8gbWF0cml4OiBnZW5lcyBhcyByb3dzLCBjZWxsIGxpbmVzIGFzIGNvbHVtbnMKYXZnX2V4cHJlc3Npb25fbWF0IDwtIGFzLm1hdHJpeChhdmdfZXhwcmVzc2lvbl92YXIpCnByaW50KHBhc3RlKCJEaW1lbnNpb25zIG9mIGV4cHJlc3Npb24gbWF0cml4OiIsIHBhc3RlKGRpbShhdmdfZXhwcmVzc2lvbl9tYXQpLCBjb2xsYXBzZSA9ICJ4IikpKQoKIyBSZW1vdmUgZ2VuZXMgd2l0aCB6ZXJvIHZhcmlhbmNlCmdlbmVfdmFyIDwtIGFwcGx5KGF2Z19leHByZXNzaW9uX21hdCwgMSwgdmFyKQphdmdfZXhwcmVzc2lvbl9tYXQgPC0gYXZnX2V4cHJlc3Npb25fbWF0W2dlbmVfdmFyID4gMCwgXQpwcmludChwYXN0ZSgiRGltZW5zaW9ucyBhZnRlciByZW1vdmluZyB6ZXJvIHZhcmlhbmNlIGdlbmVzOiIsIHBhc3RlKGRpbShhdmdfZXhwcmVzc2lvbl9tYXQpLCBjb2xsYXBzZSA9ICJ4IikpKQoKIyA0LiBDYWxjdWxhdGUgUGVhcnNvbiBjb3JyZWxhdGlvbiBkaXN0YW5jZSBiZXR3ZWVuICoqY2VsbCBsaW5lcyoqCiMgVHJhbnNwb3NlIHRoZSBtYXRyaXggc28gdGhhdCBjZWxsIGxpbmVzIGFyZSBjb21wYXJlZApjb3JfbWF0cml4IDwtIGNvcihhdmdfZXhwcmVzc2lvbl9tYXQsIG1ldGhvZCA9ICJwZWFyc29uIikKY29yX2Rpc3QgPC0gYXMuZGlzdCgxIC0gY29yX21hdHJpeCkKCiMgQ2hlY2sgZGlzdGFuY2UgbWF0cml4CnByaW50KHBhc3RlKCJBbnkgTkEgaW4gZGlzdGFuY2UgbWF0cml4OiIsIGFueShpcy5uYShjb3JfZGlzdCkpKSkKcHJpbnQocGFzdGUoIkFueSBpbmZpbml0ZSB2YWx1ZXMgaW4gZGlzdGFuY2UgbWF0cml4OiIsIGFueShpcy5pbmZpbml0ZShjb3JfZGlzdCkpKSkKcHJpbnQoIlN1bW1hcnkgb2YgZGlzdGFuY2UgdmFsdWVzOiIpCnByaW50KHN1bW1hcnkoYXMudmVjdG9yKGNvcl9kaXN0KSkpCgojIDUuIENyZWF0ZSBwaHlsb2dlbmV0aWMgdHJlZSB1c2luZyBOSiBvciBkZW5kcm9ncmFtIGFzIGZhbGxiYWNrCmxpYnJhcnkoYXBlKSAgIyBFbnN1cmUgYXBlIGlzIGxvYWRlZCBmb3IgbmooKQppZiAoYW55KGlzLm5hKGNvcl9kaXN0KSkpIHsKICBwcmludCgiRGlzdGFuY2UgbWF0cml4IGNvbnRhaW5zIE5BIHZhbHVlcywgY2Fubm90IGNyZWF0ZSBwaHlsb2dlbmV0aWMgdHJlZSIpCiAgIyBDcmVhdGUgYSBkZW5kcm9ncmFtIGFzIGZhbGxiYWNrCiAgaGMgPC0gaGNsdXN0KGNvcl9kaXN0KQogIHBsb3QoaGMsIG1haW49IkRlbmRyb2dyYW0gb2YgQ2VsbCBMaW5lcyBCYXNlZCBvbiBDb3JyZWxhdGlvbiBEaXN0YW5jZSIpCn0gZWxzZSB7CiAgdHJlZSA8LSBuaihjb3JfZGlzdCkKICBwbG90KHRyZWUsIG1haW49IlBoeWxvZ2VuZXRpYyBUcmVlIG9mIENlbGwgTGluZXMgQmFzZWQgb24gQ29ycmVsYXRpb24gRGlzdGFuY2UiKQp9CgojIDYuIENyZWF0ZSBoZWF0bWFwIGZvciB2aXN1YWxpemF0aW9uIChlbnN1cmUgaXQgY2x1c3RlcnMgY2VsbCBsaW5lcykKbGlicmFyeShwaGVhdG1hcCkKcGhlYXRtYXAoYXZnX2V4cHJlc3Npb25fbWF0LCAKICAgICAgICAgY2x1c3Rlcl9yb3dzID0gRkFMU0UsICAgICMgRG8gbm90IGNsdXN0ZXIgZ2VuZXMKICAgICAgICAgY2x1c3Rlcl9jb2xzID0gVFJVRSwgICAgICMgQ2x1c3RlciBjZWxsIGxpbmVzIChjb2x1bW5zKQogICAgICAgICBzaG93X3Jvd25hbWVzID0gRkFMU0UsCiAgICAgICAgIG1haW4gPSAiSGVhdG1hcCBvZiBUb3AgVmFyaWFibGUgR2VuZXMgQWNyb3NzIENlbGwgTGluZXMiKQoKCmBgYAoKCiMgNi4gU2F2ZSB0aGUgU2V1cmF0IG9iamVjdCBhcyBhbiBSb2JqIGZpbGUKYGBge3Igc2F2ZVJPQkp9Cgojc2F2ZShBbGxfc2FtcGxlc19NZXJnZWQsIGZpbGUgPSAiLi4vNS1TU19TY1JOQV9EYXRhX0FuYWx5c2lzLzQtU2NTU19NeUFuYWx5c2lzX29uX1NTLzAtSW1wb3J0YW50X1JfT0JKL0FsbF9zYW1wbGVzX01lcmdlZF9XTk5fY29ycmVjdF9vbl9IUEMuUm9iaiIpCgoKYGBgCgoKCgo=