#Differential Expression Analysis
#Load Seurat Object L7
load("/home/bioinfo/0-imp_Robj/Harmony_integrated_All_samples_Merged_with_PBMC10x_with_harmony_clustering.Robj")
# sct_data <- GetAssayData(All_samples_Merged, assay = "SCT", layer = "data")
# memory.limit(size = 64000)
#
# # Transpose the data so that cells are rows and genes are columns
# transposed_data <- t(as.data.frame(sct_data))
#
# # Specify the file name and save as CSV
# write.csv(transposed_data, file = "table/SCT_data_All_samples_Merged_transposed.csv", row.names = TRUE)
#
#
#
#
#
# # Extract metadata from Seurat object
# metadata <- All_samples_Merged@meta.data
#
# # Write metadata to CSV
# write.csv(metadata, file = "Extra/Metadata_All_samples_Merged.csv", row.names = TRUE)
library(dplyr)
# Load your CSV file
# data <- read.csv("Extra/NewFiles/pbmc_METADATA.csv")
#
# # Filter rows where all three predicted columns contain "CD4 T"
# filtered_data <- data %>%
# filter(grepl("CD4 T", predicted.celltype.l1) &
# grepl("CD4 T", predicted.celltype.l2) &
# grepl("CD4 T", predicted.celltype.l3))
#
# # Write the filtered data to a new CSV file, including the header
# write.csv(filtered_data, "CD4Tcells_PBMC_Control.csv", row.names = FALSE)
#
# # Save the first column (PBMC cells) to a txt file without header
# write.table(filtered_data[, 1], "Extra/NewFiles/PBMC_CD4T_cells.txt", row.names = FALSE, col.names = FALSE, quote = FALSE)
#
# # Load your CSV file
# data <- read.csv("Extra/NewFiles/Cell_lines/Metadata_All_cell_lines.csv")
#
#
# # Define the clusters of interest
# clusters_of_interest <- c(3, 8, 10, 18, 1, 2, 13, 4, 7, 9, 6, 16, 19)
#
# # Filter cells based on the specified clusters
# filtered_data <- data %>%
# filter(Harmony_snn_res.0.9 %in% clusters_of_interest)
#
# # Write the filtered data to a new CSV file
# write.csv(filtered_data, "Extra/NewFiles/Cell_lines/filtered_cells_of_cell_lines_by_cluster.csv", row.names = FALSE)
#
# # Save the first column (PBMC cells) without the header to a txt file
# write.table(filtered_data[, 1], "Extra/NewFiles/Cell_lines/All_cell_lines_cells.txt", row.names = FALSE, col.names = FALSE, quote = FALSE)
#
# # Load your CSV file
# data <- read.csv("Extra/NewFiles/Cell_lines/Metadata_All_cell_lines.csv")
#
# # To save P1 (L1+L2)
# # Define the clusters of interest
# clusters_of_interest <- c(3, 8, 10, 18)
#
# # Filter cells based on the specified clusters
# filtered_data <- data %>%
# filter(Harmony_snn_res.0.9 %in% clusters_of_interest)
#
# # Write the filtered data to a new CSV file
# write.csv(filtered_data, "Extra/NewFiles/Patients_based_on_celllines(Clusters)/P1.csv", row.names = FALSE)
#
# # Save the first column (PBMC cells) without the header to a txt file
# write.table(filtered_data[, 1], "Extra/NewFiles/Patients_based_on_celllines(Clusters)/P1.txt", row.names = FALSE, col.names = FALSE, quote = FALSE)
#
#
# # To save P1 (L3+L4)
# # Define the clusters of interest
# clusters_of_interest <- c(1, 2, 13)
#
# # Filter cells based on the specified clusters
# filtered_data <- data %>%
# filter(Harmony_snn_res.0.9 %in% clusters_of_interest)
#
# # Write the filtered data to a new CSV file
# write.csv(filtered_data, "Extra/NewFiles/Patients_based_on_celllines(Clusters)/P2.csv", row.names = FALSE)
#
# # Save the first column (PBMC cells) without the header to a txt file
# write.table(filtered_data[, 1], "Extra/NewFiles/Patients_based_on_celllines(Clusters)/P2.txt", row.names = FALSE, col.names = FALSE, quote = FALSE)
#
#
#
# # To save P1 (L5+L6+L7)
# # Define the clusters of interest
# clusters_of_interest <- c(4, 7, 9, 6, 16, 19)
#
# # Filter cells based on the specified clusters
# filtered_data <- data %>%
# filter(Harmony_snn_res.0.9 %in% clusters_of_interest)
#
# # Write the filtered data to a new CSV file
# write.csv(filtered_data, "Extra/NewFiles/Patients_based_on_celllines(Clusters)/P3.csv", row.names = FALSE)
#
# # Save the first column (PBMC cells) without the header to a txt file
# write.table(filtered_data[, 1], "Extra/NewFiles/Patients_based_on_celllines(Clusters)/P3.txt", row.names = FALSE, col.names = FALSE, quote = FALSE)
#Differential Expression Analysis
library(foreach)
library(doParallel)
setwd("/isilon/homes/nabbasi/6-DE/17-SingleCellFCscanner/")
source("scFCscanner_028_in_list.r")
# #load libraries------------------------------------
# library(dplyr)
# library(tidyverse)
#
# # Read the TSV file into R
# Exp_Allsample <- read_tsv("Extra/NewFiles/Results/Cell_lines_vs_CD4Tcells/SCT_All_cell_lines_cells_vs_PBMC_CD4T_cells.tsv")
#
# # Calculate log-fold change using FC column in the file
# Exp_Allsample$log2FC <- log2(Exp_Allsample$FC_All_cell_lines_cells_PBMC_CD4T_cells )
#
# # Filter rows based on logFC criteria
# filtered_data <- Exp_Allsample %>% filter(log2FC > 3 | log2FC < -1)
#
# # Exclude rows with "L1-L7 mean" and "mean control" both less than 0.2
# filtered_data_final <- filtered_data %>% filter(!(mean_All_cell_lines_cells < 0.2 & mean_PBMC_CD4T_cells < 0.2))
#
# # Writing it to CSV file
# write.csv(filtered_data_final, "Extra/NewFiles/Results/Cell_lines_vs_CD4Tcells/filtered_data_Cell_lines_vs_CD4Tcells.csv", row.names = FALSE)
# Load necessary libraries ------------------------------------
library(dplyr)
library(readr) # 'readr' package is recommended for reading .csv and .tsv files efficiently
# Read the TSV file into R
Exp_Allsample <- read_tsv("Extra/NewFiles/Patients_based_on_celllines_Clusters/Results_3_comparisons/SCT_data_All_samples_Merged_transposed_tab_P2_vs_P3.csv")
# Check if the first column name is missing, and if so, set it to "gene"
if (colnames(Exp_Allsample)[1] == "...1") {
colnames(Exp_Allsample)[1] <- "gene"
}
# Calculate log2 fold-change directly within the pipe (using mutate)
Exp_Allsample <- Exp_Allsample %>%
mutate(log2FC = log2(FC_P2_P3)) # Using mutate to add log2FC as a new column
# Check the summary statistics and distribution of log2FC before filtering
summary(Exp_Allsample$log2FC)
hist(Exp_Allsample$log2FC, main="Distribution of log2FC", xlab="log2FC", col="lightblue", border="black")
# Choose your own log2FC threshold for both positive and negative values
threshold_positive <- 1 # Set your chosen threshold for positive log2FC
threshold_negative <- -1 # Set your chosen threshold for negative log2FC
# Filter rows based on log2FC criteria (separate thresholds for positive and negative log2FC)
filtered_data_final <- Exp_Allsample %>%
filter(log2FC > threshold_positive | log2FC < threshold_negative) # Filter based on both conditions
filtered_data_final <- filtered_data_final %>%
filter(!(mean_P2 < 0.2 & mean_P3 < 0.2))
# Write the filtered data to a CSV file
write.csv(filtered_data_final, "Extra/NewFiles/Patients_based_on_celllines_Clusters/Results_3_comparisons/2-filtered_P2_vs_P3.csv", row.names = FALSE)
# Load necessary libraries ------------------------------------
library(dplyr)
library(readr) # 'readr' package is recommended for reading .csv and .tsv files efficiently
# Load your CSV file
data <- read.csv("/home/bioinfo/17-SingleCellFCscanner/Extra/NewFiles/Cell_lines/Metadata_All_cell_lines.csv")
# Define a helper function to filter and save data
filter_and_save <- function(data, clusters, cell_line, output_csv, output_txt) {
# Filter data based on clusters and cell line
filtered_data <- data %>%
filter(Harmony_snn_res.0.9 %in% clusters, orig.ident == cell_line)
# Save the filtered data as a CSV file
write.csv(filtered_data, output_csv, row.names = FALSE)
# Save the first column (e.g., cell identifiers) as a TXT file without a header
write.table(filtered_data[, 1], output_txt, row.names = FALSE, col.names = FALSE, quote = FALSE)
}
# Filter for L1 in P1
filter_and_save(
data = data,
clusters = c(3, 8, 10, 18), # Replace with clusters for L1
cell_line = "L1", # Specify the cell line (e.g., "L1")
output_csv = "/home/bioinfo/17-SingleCellFCscanner/Extra/NewFiles/Cell_lines/P1_L1.csv",
output_txt = "/home/bioinfo/17-SingleCellFCscanner/Extra/NewFiles/Cell_lines/P1_L1.txt"
)
# Filter for L2 in P1
filter_and_save(
data = data,
clusters = c(3, 8, 10, 18), # Replace with clusters for L2
cell_line = "L2", # Specify the cell line (e.g., "L2")
output_csv = "/home/bioinfo/17-SingleCellFCscanner/Extra/NewFiles/Cell_lines/P1_L2.csv",
output_txt = "/home/bioinfo/17-SingleCellFCscanner/Extra/NewFiles/Cell_lines/P1_L2.txt"
)
# Filter for L3 in P2
filter_and_save(
data = data,
clusters = c(1, 2, 13), # Replace with clusters for L3
cell_line = "L3", # Specify the cell line (e.g., "L3")
output_csv = "/home/bioinfo/17-SingleCellFCscanner/Extra/NewFiles/Cell_lines/P2_L3.csv",
output_txt = "/home/bioinfo/17-SingleCellFCscanner/Extra/NewFiles/Cell_lines/P2_L3.txt"
)
# Filter for L4 in P2
filter_and_save(
data = data,
clusters = c(1, 2, 13), # Replace with clusters for L4
cell_line = "L4", # Specify the cell line (e.g., "L4")
output_csv = "/home/bioinfo/17-SingleCellFCscanner/Extra/NewFiles/Cell_lines/P2_L4.csv",
output_txt = "/home/bioinfo/17-SingleCellFCscanner/Extra/NewFiles/Cell_lines/P2_L4.txt"
)
# Filter for L5 in P3
filter_and_save(
data = data,
clusters = c(4, 7, 9, 6, 16, 19), # Replace with clusters for L5
cell_line = "L5", # Specify the cell line (e.g., "L5")
output_csv = "/home/bioinfo/17-SingleCellFCscanner/Extra/NewFiles/Cell_lines/P3_L5.csv",
output_txt = "/home/bioinfo/17-SingleCellFCscanner/Extra/NewFiles/Cell_lines/P3_L5.txt"
)
# Filter for L6 in P3
filter_and_save(
data = data,
clusters = c(4, 7, 9, 6, 16, 19), # Replace with clusters for L6
cell_line = "L6", # Specify the cell line (e.g., "L6")
output_csv = "/home/bioinfo/17-SingleCellFCscanner/Extra/NewFiles/Cell_lines/P3_L6.csv",
output_txt = "/home/bioinfo/17-SingleCellFCscanner/Extra/NewFiles/Cell_lines/P3_L6.txt"
)
# Filter for L7 in P3
filter_and_save(
data = data,
clusters = c(4, 7, 9, 6, 16, 19), # Replace with clusters for L7
cell_line = "L7", # Specify the cell line (e.g., "L7")
output_csv = "/home/bioinfo/17-SingleCellFCscanner/Extra/NewFiles/Cell_lines/P3_L7.csv",
output_txt = "/home/bioinfo/17-SingleCellFCscanner/Extra/NewFiles/Cell_lines/P3_L7.txt"
)
library(foreach)
library(doParallel)
setwd("/home/bioinfo/17-SingleCellFCscanner/")
source("scFCscanner_028_in_list.r")
# Load necessary libraries ------------------------------------
library(dplyr)
library(readr) # 'readr' package is recommended for reading .csv and .tsv files efficiently
# Read the TSV file into R
Exp_Allsample <- read_tsv("Extra/NewFiles/Cell_lines/cell_lines/cell_lines_vs_control/P2_L4_vs_PBMC_CD4T_cells.csv")
# Check if the first column name is missing, and if so, set it to "gene"
if (colnames(Exp_Allsample)[1] == "...1") {
colnames(Exp_Allsample)[1] <- "gene"
}
# Calculate log2 fold-change directly within the pipe (using mutate)
Exp_Allsample <- Exp_Allsample %>%
mutate(log2FC = log2(FC_P2_L4_PBMC_CD4T_cells)) # Using mutate to add log2FC as a new column
# Check the summary statistics and distribution of log2FC before filtering
summary(Exp_Allsample$log2FC)
hist(Exp_Allsample$log2FC, main="Distribution of log2FC", xlab="log2FC", col="lightblue", border="black")
# Choose your own log2FC threshold for both positive and negative values
threshold_positive <- 3.5 # Set your chosen threshold for positive log2FC
threshold_negative <- -1 # Set your chosen threshold for negative log2FC
# Filter rows based on log2FC criteria (separate thresholds for positive and negative log2FC)
filtered_data <- Exp_Allsample %>%
filter(log2FC > threshold_positive | log2FC < threshold_negative) # Filter based on both conditions
filtered_data_final <- filtered_data %>%
filter(!(mean_P2_L4 < 0.2 & mean_PBMC_CD4T_cells < 0.2))
# Write the filtered data to a CSV file
write.csv(filtered_data_final, "Extra/NewFiles/Cell_lines/cell_lines/cell_lines_vs_control/filtered_P2_L4_vs_PBMC_CD4T_cells.csv", row.names = FALSE)
# Load necessary libraries ------------------------------------
library(dplyr)
library(readr) # 'readr' package is recommended for reading .csv and .tsv files efficiently
# Load your CSV file
data <- read.csv("../17-SingleCellFCscanner/Extra/NewFiles/Cell_lines/Metadata_All_cell_lines.csv")
# # Define the clusters of interest
# clusters_of_interest <- c(3, 8, 10, 18, 1, 2, 13, 4, 7, 9, 6, 16, 19)
# Define the clusters of interest
clusters_of_interest <- c(19)
# Filter cells based on the specified clusters
filtered_data <- data %>%
filter(Harmony_snn_res.0.9 %in% clusters_of_interest)
# Write the filtered data to a new CSV file
write.csv(filtered_data, "../17-SingleCellFCscanner/Extra/NewFiles/Clusters/filtered_cluster19.csv", row.names = FALSE)
# Save the first column (PBMC cells) without the header to a txt file
write.table(filtered_data[, 1], "Extra/NewFiles/Clusters/Cluster19_cells.txt", row.names = FALSE, col.names = FALSE, quote = FALSE)
# Load necessary libraries ------------------------------------
library(dplyr)
library(readr) # 'readr' package is recommended for reading .csv and .tsv files efficiently
# Read the TSV file into R
Exp_Allsample <- read_tsv("Extra/NewFiles/Clusters/Clusters_vs_control/C19_cells_vs_PBMC_CD4T_cells.csv")
# Check if the first column name is missing, and if so, set it to "gene"
if (colnames(Exp_Allsample)[1] == "...1") {
colnames(Exp_Allsample)[1] <- "gene"
}
# Calculate log2 fold-change directly within the pipe (using mutate)
Exp_Allsample <- Exp_Allsample %>%
mutate(log2FC = log2(FC_Cluster19_cells_PBMC_CD4T_cells)) # Using mutate to add log2FC as a new column
# Check the summary statistics and distribution of log2FC before filtering
summary(Exp_Allsample$log2FC)
hist(Exp_Allsample$log2FC, main="Distribution of log2FC", xlab="log2FC", col="lightblue", border="black")
# Choose your own log2FC threshold for both positive and negative values
threshold_positive <- 4 # Set your chosen threshold for positive log2FC
threshold_negative <- -2 # Set your chosen threshold for negative log2FC
# Filter rows based on log2FC criteria (separate thresholds for positive and negative log2FC)
filtered_data <- Exp_Allsample %>%
filter(log2FC > threshold_positive | log2FC < threshold_negative) # Filter based on both conditions
filtered_data_final <- filtered_data %>%
filter(!(mean_Cluster19_cells < 0.2 & mean_PBMC_CD4T_cells < 0.2))
# Write the filtered data to a CSV file
write.csv(filtered_data_final, "Extra/NewFiles/Clusters/Clusters_vs_control/2-filtered_C19_vs_PBMC_CD4T_cells.csv", row.names = FALSE)
# Define file paths
input_file <- "Extra/NewFiles/Clusters/Clusters_vs_control/2-filtered_C19_vs_PBMC_CD4T_cells.csv"
output_folder <- "Extra/NewFiles/Clusters/Clusters_vs_control/Enrichment_files/"
# Ensure output folder exists
if (!dir.exists(output_folder)) {
dir.create(output_folder, recursive = TRUE)
}
# Load the data (handling errors gracefully)
data <- tryCatch({
read_csv(input_file)
}, error = function(e) {
stop("Error reading input file. Check the file path or format.")
})
# Check if the necessary columns exist
if (!all(c("log2FC", "gene") %in% colnames(data))) {
stop("Required columns ('log2FC' and 'gene') are missing in the data.")
}
# Filter for upregulated and downregulated genes
upregulated_genes <- data %>% filter(log2FC > 0)
downregulated_genes <- data %>% filter(log2FC < 0)
# Extract only gene names as vectors
upregulated_gene_names <- upregulated_genes %>% pull(gene)
downregulated_gene_names <- downregulated_genes %>% pull(gene)
# Define output file paths
upregulated_file <- file.path(output_folder, "upregulated_gene_names.txt")
downregulated_file <- file.path(output_folder, "downregulated_gene_names.txt")
# Save the gene names to text files
write_lines(upregulated_gene_names, upregulated_file)
write_lines(downregulated_gene_names, downregulated_file)
# Print the number of upregulated and downregulated genes
cat("Number of upregulated genes:", length(upregulated_gene_names), "\n")
cat("Number of downregulated genes:", length(downregulated_gene_names), "\n")
cat("Gene names saved to:\n")
cat(" Upregulated genes: ", upregulated_file, "\n")
cat(" Downregulated genes: ", downregulated_file, "\n")
library(dplyr)
# Load your CSV file
data <- read.csv("Extra/NewFiles/Cell_lines/filtered_cells_of_cell_lines_by_cluster.csv")
# Filter rows where all three predicted columns contain "CD4 T"
filtered_data <- data %>%
filter(grepl("B memory", predicted.celltype.l2) &
grepl("B memory lambda", predicted.celltype.l3))
# Write the filtered data to a new CSV file, including the header
write.csv(filtered_data, "Extra/NewFiles/B_memory_in_cellline_clusters.csv", row.names = FALSE)
# Save the first column (PBMC cells) to a txt file without header
write.table(filtered_data[, 1], "Extra/NewFiles/B_memory_cells_in_cellline_clusters.txt", row.names = FALSE, col.names = FALSE, quote = FALSE)
# # Load your CSV file
# data <- read.csv("Extra/NewFiles/Cell_lines/Metadata_All_cell_lines.csv")
#
#
# # Define the clusters of interest
# clusters_of_interest <- c(3, 8, 10, 18, 1, 2, 13, 4, 7, 9, 6, 16, 19)
#
# # Filter cells based on the specified clusters
# filtered_data <- data %>%
# filter(Harmony_snn_res.0.9 %in% clusters_of_interest)
#
# # Write the filtered data to a new CSV file
# write.csv(filtered_data, "Extra/NewFiles/Cell_lines/filtered_cells_of_cell_lines_by_cluster.csv", row.names = FALSE)
#
# # Save the first column (PBMC cells) without the header to a txt file
# write.table(filtered_data[, 1], "Extra/NewFiles/Cell_lines/All_cell_lines_cells.txt", row.names = FALSE, col.names = FALSE, quote = FALSE)
#
# # Load your CSV file
# data <- read.csv("Extra/NewFiles/Cell_lines/Metadata_All_cell_lines.csv")
#
# # To save P1 (L1+L2)
# # Define the clusters of interest
# clusters_of_interest <- c(3, 8, 10, 18)
#
# # Filter cells based on the specified clusters
# filtered_data <- data %>%
# filter(Harmony_snn_res.0.9 %in% clusters_of_interest)
#
# # Write the filtered data to a new CSV file
# write.csv(filtered_data, "Extra/NewFiles/Patients_based_on_celllines(Clusters)/P1.csv", row.names = FALSE)
#
# # Save the first column (PBMC cells) without the header to a txt file
# write.table(filtered_data[, 1], "Extra/NewFiles/Patients_based_on_celllines(Clusters)/P1.txt", row.names = FALSE, col.names = FALSE, quote = FALSE)
#
#
# # To save P1 (L3+L4)
# # Define the clusters of interest
# clusters_of_interest <- c(1, 2, 13)
#
# # Filter cells based on the specified clusters
# filtered_data <- data %>%
# filter(Harmony_snn_res.0.9 %in% clusters_of_interest)
#
# # Write the filtered data to a new CSV file
# write.csv(filtered_data, "Extra/NewFiles/Patients_based_on_celllines(Clusters)/P2.csv", row.names = FALSE)
#
# # Save the first column (PBMC cells) without the header to a txt file
# write.table(filtered_data[, 1], "Extra/NewFiles/Patients_based_on_celllines(Clusters)/P2.txt", row.names = FALSE, col.names = FALSE, quote = FALSE)
#
#
#
# # To save P1 (L5+L6+L7)
# # Define the clusters of interest
# clusters_of_interest <- c(4, 7, 9, 6, 16, 19)
#
# # Filter cells based on the specified clusters
# filtered_data <- data %>%
# filter(Harmony_snn_res.0.9 %in% clusters_of_interest)
#
# # Write the filtered data to a new CSV file
# write.csv(filtered_data, "Extra/NewFiles/Patients_based_on_celllines(Clusters)/P3.csv", row.names = FALSE)
#
# # Save the first column (PBMC cells) without the header to a txt file
# write.table(filtered_data[, 1], "Extra/NewFiles/Patients_based_on_celllines(Clusters)/P3.txt", row.names = FALSE, col.names = FALSE, quote = FALSE)
library(foreach)
library(doParallel)
setwd("/home/bioinfo/17-SingleCellFCscanner/")
source("scFCscanner_028_in_list.r")
# Load necessary libraries ------------------------------------
library(dplyr)
library(readr) # 'readr' package is recommended for reading .csv and .tsv files efficiently
# Read the TSV file into R
Exp_Allsample <- read_tsv("Extra/NewFiles/Other_cells_in_celllines/2-FC_scanner_Results/SCT_data_All_samples_Merged_transposed_tab_B_memory_cells_in_cellline_clusters_vs_PBMC_CD4T_cells.csv")
# Check if the first column name is missing, and if so, set it to "gene"
if (colnames(Exp_Allsample)[1] == "...1") {
colnames(Exp_Allsample)[1] <- "gene"
}
# Calculate log2 fold-change directly within the pipe (using mutate)
Exp_Allsample <- Exp_Allsample %>%
mutate(log2FC = log2(FC_B_memory_cells_in_cellline_clusters_PBMC_CD4T_cells)) # Using mutate to add log2FC as a new column
# Check the summary statistics and distribution of log2FC before filtering
summary(Exp_Allsample$log2FC)
hist(Exp_Allsample$log2FC, main="Distribution of log2FC", xlab="log2FC", col="lightblue", border="black")
# Choose your own log2FC threshold for both positive and negative values
threshold_positive <- 2.5 # Set your chosen threshold for positive log2FC
threshold_negative <- -1.5 # Set your chosen threshold for negative log2FC
# Filter rows based on log2FC criteria (separate thresholds for positive and negative log2FC)
filtered_data <- Exp_Allsample %>%
filter(log2FC > threshold_positive | log2FC < threshold_negative) # Filter based on both conditions
filtered_data_final <- filtered_data %>%
filter(!(mean_B_memory_cells_in_cellline_clusters < 0.2 & mean_PBMC_CD4T_cells < 0.2))
# Write the filtered data to a CSV file
write.csv(filtered_data_final, "Extra/NewFiles/Other_cells_in_celllines/3-files_for_Enrichment/2-filtered_B_memory_cells_in_cellline_clusters_vs_PBMC_CD4T_cells.csv", row.names = FALSE)
# Define file paths
input_file <- "Extra/NewFiles/Other_cells_in_celllines/3-files_for_Enrichment/2-filtered_NK_Proliferating_cells_in_cellline_clusters_vs_PBMC_CD4T_cells.csv"
output_folder <- "Extra/NewFiles/Other_cells_in_celllines/3-files_for_Enrichment/Enrichment_files/"
# Ensure output folder exists
if (!dir.exists(output_folder)) {
dir.create(output_folder, recursive = TRUE)
}
# Load the data (handling errors gracefully)
data <- tryCatch({
read_csv(input_file)
}, error = function(e) {
stop("Error reading input file. Check the file path or format.")
})
# Check if the necessary columns exist
if (!all(c("log2FC", "gene") %in% colnames(data))) {
stop("Required columns ('log2FC' and 'gene') are missing in the data.")
}
# Filter for upregulated and downregulated genes
upregulated_genes <- data %>% filter(log2FC > 0)
downregulated_genes <- data %>% filter(log2FC < 0)
# Extract only gene names as vectors
upregulated_gene_names <- upregulated_genes %>% pull(gene)
downregulated_gene_names <- downregulated_genes %>% pull(gene)
# Define output file paths
upregulated_file <- file.path(output_folder, "upregulated_gene_names.txt")
downregulated_file <- file.path(output_folder, "downregulated_gene_names.txt")
# Save the gene names to text files
write_lines(upregulated_gene_names, upregulated_file)
write_lines(downregulated_gene_names, downregulated_file)
# Print the number of upregulated and downregulated genes
cat("Number of upregulated genes:", length(upregulated_gene_names), "\n")
cat("Number of downregulated genes:", length(downregulated_gene_names), "\n")
cat("Gene names saved to:\n")
cat(" Upregulated genes: ", upregulated_file, "\n")
cat(" Downregulated genes: ", downregulated_file, "\n")
# Load necessary libraries ------------------------------------
library(dplyr)
Attachement du package : ‘dplyr’
Les objets suivants sont masqués depuis ‘package:stats’:
filter, lag
Les objets suivants sont masqués depuis ‘package:base’:
intersect, setdiff, setequal, union
library(readr) # 'readr' package is recommended for reading .csv and .tsv files efficiently
# Read the TSV file into R
Exp_Allsample <- read_tsv("Extra/NewFiles/Results/Cell_lines_vs_CD4Tcells/SCT_All_cell_lines_cells_vs_PBMC_CD4T_cells.tsv")
New names:Rows: 27417 Columns: 8── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (1): ...1
dbl (7): mean_All_cell_lines_cells, mean_PBMC_CD4T_cells, Relative_variance_All_cell_lines_cells, Relative_variance_PBMC_CD4T_cells, FC_All_cell_lines_cel...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Check if the first column name is missing, and if so, set it to "gene"
if (colnames(Exp_Allsample)[1] == "...1") {
colnames(Exp_Allsample)[1] <- "gene"
}
# Calculate log2 fold-change directly within the pipe (using mutate)
Exp_Allsample <- Exp_Allsample %>%
mutate(log2FC = log2(FC_All_cell_lines_cells_PBMC_CD4T_cells)) # Using mutate to add log2FC as a new column