Subsets.of.csv from Big CSV

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

# Directory containing the CSV files
csv_directory <- "/Users/usri/Desktop/JUNE25/RG.oRG/result"

# List all CSV files in the directory
csv_files <- list.files(path = csv_directory, pattern = "\\.csv$", full.names = TRUE)

# Initialize an empty data frame to store the merged data
merged_data <- data.frame()

# Iterate over each CSV file
for (csv_file in csv_files) {
    # Read the CSV file into a data frame
    df <- read.csv(csv_file)
   
    # Identify the common column(s) for merging
    common_columns <- intersect(names(merged_data), names(df))
   
    # Merge the data frame with the merged_data data frame
    if (length(common_columns) > 0) {
        merged_data <- merge(merged_data, df, by = "name", all = TRUE)
    } else {
        merged_data <- df
    }
}

## Warning in merge.data.frame(merged_data, df, by = "name", all = TRUE): column
## names 'pval.x', 'adj_pval.x', 'f_statistic.x', 'df1.x', 'df2.x', 'lfc.x',
## 'diffexpressed.x', 'Type.x', 'pval.y', 'adj_pval.y', 'f_statistic.y', 'df1.y',
## 'df2.y', 'lfc.y', 'diffexpressed.y', 'Type.y' are duplicated in the result

## Warning in merge.data.frame(merged_data, df, by = "name", all = TRUE): column
## names 'pval.x', 'adj_pval.x', 'f_statistic.x', 'df1.x', 'df2.x', 'lfc.x',
## 'diffexpressed.x', 'Type.x', 'pval.y', 'adj_pval.y', 'f_statistic.y', 'df1.y',
## 'df2.y', 'lfc.y', 'diffexpressed.y', 'Type.y' are duplicated in the result

## Warning in merge.data.frame(merged_data, df, by = "name", all = TRUE): column
## names 'pval.x', 'adj_pval.x', 'f_statistic.x', 'df1.x', 'df2.x', 'lfc.x',
## 'diffexpressed.x', 'Type.x', 'pval.y', 'adj_pval.y', 'f_statistic.y', 'df1.y',
## 'df2.y', 'lfc.y', 'diffexpressed.y', 'Type.y', 'pval.x', 'adj_pval.x',
## 'f_statistic.x', 'df1.x', 'df2.x', 'lfc.x', 'diffexpressed.x', 'Type.x',
## 'pval.y', 'adj_pval.y', 'f_statistic.y', 'df1.y', 'df2.y', 'lfc.y',
## 'diffexpressed.y', 'Type.y' are duplicated in the result

## Warning in merge.data.frame(merged_data, df, by = "name", all = TRUE): column
## names 'pval.x', 'adj_pval.x', 'f_statistic.x', 'df1.x', 'df2.x', 'lfc.x',
## 'diffexpressed.x', 'Type.x', 'pval.y', 'adj_pval.y', 'f_statistic.y', 'df1.y',
## 'df2.y', 'lfc.y', 'diffexpressed.y', 'Type.y', 'pval.x', 'adj_pval.x',
## 'f_statistic.x', 'df1.x', 'df2.x', 'lfc.x', 'diffexpressed.x', 'Type.x',
## 'pval.y', 'adj_pval.y', 'f_statistic.y', 'df1.y', 'df2.y', 'lfc.y',
## 'diffexpressed.y', 'Type.y' are duplicated in the result

## Warning in merge.data.frame(merged_data, df, by = "name", all = TRUE): column
## names 'pval.x', 'adj_pval.x', 'f_statistic.x', 'df1.x', 'df2.x', 'lfc.x',
## 'diffexpressed.x', 'Type.x', 'pval.y', 'adj_pval.y', 'f_statistic.y', 'df1.y',
## 'df2.y', 'lfc.y', 'diffexpressed.y', 'Type.y', 'pval.x', 'adj_pval.x',
## 'f_statistic.x', 'df1.x', 'df2.x', 'lfc.x', 'diffexpressed.x', 'Type.x',
## 'pval.y', 'adj_pval.y', 'f_statistic.y', 'df1.y', 'df2.y', 'lfc.y',
## 'diffexpressed.y', 'Type.y', 'pval.x', 'adj_pval.x', 'f_statistic.x', 'df1.x',
## 'df2.x', 'lfc.x', 'diffexpressed.x', 'Type.x', 'pval.y', 'adj_pval.y',
## 'f_statistic.y', 'df1.y', 'df2.y', 'lfc.y', 'diffexpressed.y', 'Type.y' are
## duplicated in the result

## Warning in merge.data.frame(merged_data, df, by = "name", all = TRUE): column
## names 'pval.x', 'adj_pval.x', 'f_statistic.x', 'df1.x', 'df2.x', 'lfc.x',
## 'diffexpressed.x', 'Type.x', 'pval.y', 'adj_pval.y', 'f_statistic.y', 'df1.y',
## 'df2.y', 'lfc.y', 'diffexpressed.y', 'Type.y', 'pval.x', 'adj_pval.x',
## 'f_statistic.x', 'df1.x', 'df2.x', 'lfc.x', 'diffexpressed.x', 'Type.x',
## 'pval.y', 'adj_pval.y', 'f_statistic.y', 'df1.y', 'df2.y', 'lfc.y',
## 'diffexpressed.y', 'Type.y', 'pval.x', 'adj_pval.x', 'f_statistic.x', 'df1.x',
## 'df2.x', 'lfc.x', 'diffexpressed.x', 'Type.x', 'pval.y', 'adj_pval.y',
## 'f_statistic.y', 'df1.y', 'df2.y', 'lfc.y', 'diffexpressed.y', 'Type.y' are
## duplicated in the result

# Save the merged data to a new CSV file
write.csv(merged_data, "merged_data.csv", row.names = FALSE)
x <- na.omit(merged_data)
write.csv(as.data.frame(x), file = 'merged_data.RG.x.csv.csv', row.names = FALSE)

Subsets.of.csv from Big CSV

Upasna Srivastava

2023-06-24