title: “DNA FASTQ to BigWig Pipeline Report”

subtitle: “DNAfastqBigWig_human_main_v5_31aug2025.sh Execution Summary”

author: “Automated Pipeline Report”

date: “2026-01-12”

output:

html_document:

toc: true

toc_depth: 3

toc_float: true

theme: bootstrap

highlight: tango

code_folding: hide

df_print: paged

pdf_document:

toc: true

toc_depth: 3

number_sections: true

highlight: tango

df_print: kable

fig_caption: true

keep_tex: false

latex_engine: pdflatex

includes:

  in_header:

    - \usepackage{booktabs}

    - \usepackage{longtable}

    - \usepackage{array}

    - \usepackage{multirow}

    - \usepackage{wrapfig}

    - \usepackage{float}

    - \usepackage{colortbl}

    - \usepackage{pdflscape}

    - \usepackage{tabu}

    - \usepackage{threeparttable}

    - \usepackage{threeparttablex}

    - \usepackage[normalem]{ulem}

    - \usepackage{makecell}

    - \usepackage{xcolor}

geometry: "margin=1in"

fontsize: 11pt

linestretch: 1.2

Executive Summary

This report documents the execution of the DNA FASTQ to BigWig processing pipeline (DNAfastqBigWig_human_main_v5_31aug2025.sh) which performs comprehensive analysis of paired-end DNA sequencing data from raw FASTQ files to normalized genome coverage tracks.

Pipeline Overview

The pipeline executes the following major steps:

Quality Control (Initial) - FastQC analysis of raw FASTQ files
Adapter Trimming - TrimGalore processing for quality and adapter removal
Quality Control (Post-trimming) - FastQC analysis of trimmed files
Genome Alignment - Bowtie2 mapping to hg38 reference genome
Duplicate Removal - Picard deduplication of aligned reads
Coverage Generation - Creation of normalized bedGraph and bigWig files
Quality Reporting - MultiQC summaries at each major step

System Information

Computing Environment

# System information

system_info <- data.frame(

  Parameter = c("Hostname", "User", "Operating System", "R Version", "Report Generated", "Pipeline Script"),

  Value = c(

    "biolserv",
    "micgdu",

    system("uname -a", intern = TRUE),

    R.version.string,

    "2026-01-12 23:54:22",

    "DNAfastqBigWig_human_main_v5_31aug2025.sh"

  ),

  stringsAsFactors = FALSE

)

# Format table based on output

if(output_format == "latex") {

  kable(system_info, caption = "System and Environment Information",

        booktabs = TRUE, longtable = FALSE) %>%

    kableExtra::kable_styling(latex_options = c("striped", "hold_position"))

} else {

  kable(system_info, caption = "System and Environment Information")

}

System and Environment Information
Parameter	Value
Hostname	biolserv
User	micgdu
Operating System	Linux biolserv 6.8.0-90-generic #91-Ubuntu SMP PREEMPT_DYNAMIC Tue Nov 18 14:14:30 UTC 2025 x86_64 x86_64 x86_64 GNU/Linux
R Version	R version 4.3.2 (2023-10-31)
Report Generated	2026-01-12 23:54:22
Pipeline Script	DNAfastqBigWig_human_main_v5_31aug2025.sh

Hardware Resources

# Get hardware information

cpu_info <- system("nproc", intern = TRUE)

memory_info <- system("free -h | grep '^Mem:' | awk '{print $2}'", intern = TRUE)

disk_info <- system("df -h . | tail -1 | awk '{print $2\" (\"$4\" available)\"}'", intern = TRUE)

hardware_info <- data.frame(

  Resource = c("CPU Cores", "Total Memory", "Disk Space (Available)"),

  Specification = c(cpu_info, memory_info, disk_info),

  stringsAsFactors = FALSE

)

# Format table based on output

if(output_format == "latex") {

  kable(hardware_info, caption = "Hardware Resources",

        booktabs = TRUE, longtable = FALSE) %>%

    kableExtra::kable_styling(latex_options = c("striped", "hold_position"))

} else {

  kable(hardware_info, caption = "Hardware Resources")

}

Hardware Resources
Resource	Specification
CPU Cores	144
Total Memory	503Gi
Disk Space (Available)	16T (811G available)

Software Versions

# Check software versions

get_version <- function(cmd) {

  tryCatch({

    result <- system(cmd, intern = TRUE)[1]

    if(is.na(result) || result == "") return("Not available")

    return(result)

  }, error = function(e) "Not available")

}

software_versions <- data.frame(

  Software = c("FastQC", "TrimGalore", "Bowtie2", "Samtools", "Picard", "Bedtools", "MultiQC"),

  Version_Command = c(

    "fastqc --version 2>&1 | head -1",

    "trim_galore --version 2>&1 | head -1",

    "bowtie2 --version 2>&1 | head -1",

    "samtools --version 2>&1 | head -1",

    "java -jar /home/micgdu/software/picard.jar MarkDuplicates --version 2>&1 | head -1",

    "bedtools --version 2>&1 | head -1",

    "multiqc --version 2>&1 | head -1"

  ),

  stringsAsFactors = FALSE

)

software_versions$Version <- sapply(software_versions$Version_Command, get_version)

software_versions$Version_Command <- NULL

# Format table based on output

if(output_format == "latex") {

  kable(software_versions, caption = "Software Versions Used",

        booktabs = TRUE, longtable = FALSE) %>%

    kableExtra::kable_styling(latex_options = c("striped", "hold_position"))

} else {

  kable(software_versions, caption = "Software Versions Used")

}

Software Versions Used
Software	Version
FastQC	FastQC v0.12.1
TrimGalore	Not available
Bowtie2	/home/micgdu/software/bowtie2/bowtie2-2.5.0-linux-x86_64/bowtie2-align-s version 2.5.0
Samtools	samtools 1.19.2
Picard	Version:3.3.0
Bedtools	bedtools v2.31.1
MultiQC	multiqc, version 1.30

Pipeline Execution Analysis

Script-Level Resource Usage and Timing

# Define the scripts used in the pipeline with their resource profiles

pipeline_scripts <- data.frame(

  Step = c(

    "1. Initial FastQC",

    "2. TrimGalore",

    "3. Post-trim FastQC",

    "4. Bowtie2 Alignment",

    "5. Picard Deduplication",

    "6. Coverage Generation"

  ),

  Script_Name = c(

    "fastqc_batch_v1_30aug2025.sh",

    "trimgalore_batch_final_v2_30aug2025.sh",

    "fastqc_batch_v1_30aug2025.sh",

    "bowtie2_human_batch_v1_31aug.sh",

    "picard_deduplication_batch_31aug2025_v8.sh",

    "genomecoverage_batch_v1_31aug2025.sh"

  ),

  Lower_Level_Script = c(

    "Built-in FastQC",

    "Built-in TrimGalore",

    "Built-in FastQC",

    "bowtie2_dovetail_pairedEnd_Hsapiens_31Aug25.sh",

    "picard_deduplication_28aug2025.sh",

    "genomeCoverage_DNA_human_26aug2025.sh"

  ),

  Concurrent_Jobs = c("2x parameter", "1x parameter", "2x parameter", "Parameter", "Parameter", "Parameter"),

  Threads_Per_Job = c("10 (fixed)", "8 (fixed)", "10 (fixed)", "15 (Bowtie2)", "1 (Picard)", "10 (samtools)"),

  Memory_Per_Job = c("~2GB", "~4GB", "~2GB", "~8GB", "128GB (Java heap)", "~16GB"),

  Primary_Tool = c("FastQC", "TrimGalore + cutadapt", "FastQC", "Bowtie2 + samtools", "Picard MarkDuplicates", "bedtools + samtools"),

  stringsAsFactors = FALSE

)

# Format table based on output

if(output_format == "latex") {

  kable(pipeline_scripts, caption = "Pipeline Scripts and Resource Requirements",

        booktabs = TRUE, longtable = TRUE) %>%

    kableExtra::kable_styling(latex_options = c("striped", "repeat_header"),

                            font_size = 9) %>%

    kableExtra::landscape()

} else {

  kable(pipeline_scripts, caption = "Pipeline Scripts and Resource Requirements")

}

Pipeline Scripts and Resource Requirements
Step	Script_Name	Lower_Level_Script	Concurrent_Jobs	Threads_Per_Job	Memory_Per_Job	Primary_Tool
Initial FastQC	fastqc_batch_v1_30aug2025.sh	Built-in FastQC	2x parameter	10 (fixed)	~2GB	FastQC
TrimGalore	trimgalore_batch_final_v2_30aug2025.sh	Built-in TrimGalore	1x parameter	8 (fixed)	~4GB	TrimGalore + cutadapt
Post-trim FastQC	fastqc_batch_v1_30aug2025.sh	Built-in FastQC	2x parameter	10 (fixed)	~2GB	FastQC
Bowtie2 Alignment	bowtie2_human_batch_v1_31aug.sh	bowtie2_dovetail_pairedEnd_Hsapiens_31Aug25.sh	Parameter	15 (Bowtie2)	~8GB	Bowtie2 + samtools
Picard Deduplication	picard_deduplication_batch_31aug2025_v8.sh	picard_deduplication_28aug2025.sh	Parameter	1 (Picard)	128GB (Java heap)	Picard MarkDuplicates
Coverage Generation	genomecoverage_batch_v1_31aug2025.sh	genomeCoverage_DNA_human_26aug2025.sh	Parameter	10 (samtools)	~16GB	bedtools + samtools

Theoretical Resource Consumption

# Calculate theoretical resource usage based on script parameters

calculate_resources <- function(max_jobs_param = 8) {

  resource_calc <- data.frame(

    Step = pipeline_scripts$Step,

    Jobs = c(

      2 * max_jobs_param, # FastQC: 2x parameter

      1 * max_jobs_param, # TrimGalore: 1x parameter

      2 * max_jobs_param, # FastQC: 2x parameter

      1 * max_jobs_param, # Bowtie2: 1x parameter

      1 * max_jobs_param, # Picard: 1x parameter

      1 * max_jobs_param  # Coverage: 1x parameter

    ),

    Threads_Per_Job = c(10, 8, 10, 15, 1, 10),

    Memory_GB_Per_Job = c(2, 4, 2, 8, 128, 16),

    stringsAsFactors = FALSE

  )

  resource_calc$Total_Threads <- resource_calc$Jobs * resource_calc$Threads_Per_Job

  resource_calc$Total_Memory_GB <- resource_calc$Jobs * resource_calc$Memory_GB_Per_Job

  return(resource_calc)

}

# Calculate for default parameter of 8

resource_usage <- calculate_resources(8)

# Format table based on output

if(output_format == "latex") {

  kable(resource_usage, caption = "Theoretical Resource Usage (max\\_jobs parameter = 8)",

        booktabs = TRUE, longtable = FALSE) %>%

    kableExtra::kable_styling(latex_options = c("striped", "hold_position"),

                            font_size = 10)

} else {

  kable(resource_usage, caption = "Theoretical Resource Usage (max_jobs parameter = 8)")

}

Theoretical Resource Usage (max_jobs parameter = 8)
Step	Jobs	Threads_Per_Job	Memory_GB_Per_Job	Total_Threads	Total_Memory_GB
Initial FastQC	16	10	2	160	32
TrimGalore	8	8	4	64	32
Post-trim FastQC	16	10	2	160	32
Bowtie2 Alignment	8	15	8	120	64
Picard Deduplication	8	1	128	8	1024
Coverage Generation	8	10	16	80	128

# Summary statistics

peak_threads <- max(resource_usage$Total_Threads)

peak_memory <- max(resource_usage$Total_Memory_GB)

total_thread_hours <- sum(resource_usage$Total_Threads) * 2 # Assuming 2 hours average per step

Peak Resource Requirements:

Maximum concurrent threads: 160
Maximum memory usage: 1024 GB
Total computational thread-hours: ~1184 (estimated for medium dataset)

File System Analysis

Directory Structure

# List the directory structure created by the pipeline

directories <- c(

  "fastQC/",

  "fastQC/fastQC_unTrimmed/",

  "fastQC/fastQC_trimmed/",

  "multiQC/",

  "multiQC/multiQC_unTrimmed/",

  "multiQC/multiQC_trimmed/",

  "multiQC/multiQC_alignments/",

  "multiQC/multiQC_deduplication/",

  "trimmedFastq/",

  "bams/",

  "dedupBams/",

  "bedGraph/",

  "NormBedGraph/",

  "bigwig/"

)

dir_info <- data.frame(

  Directory = directories,

  Purpose = c(

    "FastQC reports root directory",

    "FastQC reports for raw FASTQ files",

    "FastQC reports for trimmed FASTQ files",

    "MultiQC reports root directory",

    "MultiQC summary for raw FASTQ analysis",

    "MultiQC summary for trimmed FASTQ analysis",

    "MultiQC summary for alignment statistics",

    "MultiQC summary for deduplication statistics",

    "Trimmed FASTQ files (*_val_*.fq.gz)",

    "Aligned BAM files (*_sorted_stChr.bam)",

    "Deduplicated BAM files (*_dedup.bam)",

    "Raw bedGraph coverage files",

    "Normalized bedGraph coverage files",

    "BigWig coverage files (*_Snorm.bw)"

  ),

  stringsAsFactors = FALSE

)

# Format table based on output

if(output_format == "latex") {

  kable(dir_info, caption = "Pipeline Output Directory Structure",

        booktabs = TRUE, longtable = TRUE) %>%

    kableExtra::kable_styling(latex_options = c("striped", "repeat_header"),

                            font_size = 9)

} else {

  kable(dir_info, caption = "Pipeline Output Directory Structure")

}

Pipeline Output Directory Structure
Directory	Purpose
fastQC/	FastQC reports root directory
fastQC/fastQC_unTrimmed/	FastQC reports for raw FASTQ files
fastQC/fastQC_trimmed/	FastQC reports for trimmed FASTQ files
multiQC/	MultiQC reports root directory
multiQC/multiQC_unTrimmed/	MultiQC summary for raw FASTQ analysis
multiQC/multiQC_trimmed/	MultiQC summary for trimmed FASTQ analysis
multiQC/multiQC_alignments/	MultiQC summary for alignment statistics
multiQC/multiQC_deduplication/	MultiQC summary for deduplication statistics
trimmedFastq/	Trimmed FASTQ files (val.fq.gz)
bams/	Aligned BAM files (*_sorted_stChr.bam)
dedupBams/	Deduplicated BAM files (*_dedup.bam)
bedGraph/	Raw bedGraph coverage files
NormBedGraph/	Normalized bedGraph coverage files
bigwig/	BigWig coverage files (*_Snorm.bw)

File Size Analysis

# Function to get file sizes and counts

get_directory_info <- function(dir_path) {

  if (!dir.exists(dir_path)) {

    return(data.frame(

      Directory = basename(dir_path),

      Files = 0,

      Total_Size_GB = 0,

      Avg_Size_MB = 0,

      stringsAsFactors = FALSE

    ))

  }

  files <- list.files(dir_path, recursive = TRUE, full.names = TRUE)

  if (length(files) == 0) {

    return(data.frame(

      Directory = basename(dir_path),

      Files = 0,

      Total_Size_GB = 0,

      Avg_Size_MB = 0,

      stringsAsFactors = FALSE

    ))

  }

  file_info <- file.info(files)

  total_size_bytes <- sum(file_info$size, na.rm = TRUE)

  total_size_gb <- total_size_bytes / (1024^3)

  avg_size_mb <- (total_size_bytes / length(files)) / (1024^2)

  data.frame(

    Directory = basename(dir_path),

    Files = length(files),

    Total_Size_GB = round(total_size_gb, 2),

    Avg_Size_MB = round(avg_size_mb, 2),

    stringsAsFactors = FALSE

  )

}

# Analyze each directory

dir_analysis <- do.call(rbind, lapply(directories, function(d) {

  get_directory_info(file.path(getwd(), d))

}))

# Add totals row

totals <- data.frame(

  Directory = "**TOTAL**",

  Files = sum(dir_analysis$Files),

  Total_Size_GB = sum(dir_analysis$Total_Size_GB),

  Avg_Size_MB = round(mean(dir_analysis$Avg_Size_MB), 2),

  stringsAsFactors = FALSE

)

dir_analysis_with_totals <- rbind(dir_analysis, totals)

# Format table based on output

if(output_format == "latex") {

  kable(dir_analysis_with_totals, caption = "File System Usage Analysis",

        booktabs = TRUE, longtable = FALSE) %>%

    kableExtra::kable_styling(latex_options = c("striped", "hold_position"))

} else {

  kable(dir_analysis_with_totals, caption = "File System Usage Analysis")

}

File System Usage Analysis
Directory	Files	Total_Size_GB	Avg_Size_MB
fastQC	98	0.03	0.28
fastQC_unTrimmed	49	0.01	0.28
fastQC_trimmed	49	0.01	0.28
multiQC	145	0.03	0.21
multiQC_unTrimmed	53	0.01	0.19
multiQC_trimmed	57	0.01	0.18
multiQC_alignments	16	0.01	0.34
multiQC_deduplication	19	0.01	0.28
trimmedFastq	38	18.14	488.87
bams	37	23.00	636.61
dedupBams	71	14.97	215.84
bedGraph	0	0.00	0.00
NormBedGraph	0	0.00	0.00
bigwig	6	1.35	230.80
TOTAL	638	57.58	112.44

Log and Report Analysis

Batch Processing Logs

# Function to find and analyze log directories

find_log_dirs <- function(pattern) {

  log_dirs <- list.dirs(".", recursive = TRUE, full.names = FALSE)

  log_dirs[grepl(pattern, log_dirs)]

}

# Find all batch processing log directories

log_types <- c(

  "fastqc_batch_logs",

  "trimgalore_batch_logs",

  "bowtie2_batch_logs",

  "picard_batch_logs",

  "genomecov_batch_logs"

)

log_summary <- data.frame(

  Log_Type = c(

    "FastQC Batch Logs",

    "TrimGalore Batch Logs",

    "Bowtie2 Batch Logs",

    "Picard Batch Logs",

    "GenomeCov Batch Logs"

  ),

  Pattern = paste0(log_types, "_*"),

  Purpose = c(

    "Individual FastQC job logs, timing, and errors",

    "Individual TrimGalore job logs and statistics",

    "Individual Bowtie2 alignment logs and metrics",

    "Individual Picard deduplication logs and metrics",

    "Individual genome coverage generation logs"

  ),

  Contains = c(

    "Job logs, error logs, main batch log, PID file",

    "Job logs, error logs, main batch log, PID file",

    "Job logs, error logs, main batch log, PID file",

    "Job logs, error logs, main batch log, PID file",

    "Job logs, error logs, main batch log, PID file"

  ),

  stringsAsFactors = FALSE

)

# Format table based on output

if(output_format == "latex") {

  kable(log_summary, caption = "Batch Processing Log Structure",

        booktabs = TRUE, longtable = TRUE) %>%

    kableExtra::kable_styling(latex_options = c("striped", "repeat_header"),

                            font_size = 8)

} else {

  kable(log_summary, caption = "Batch Processing Log Structure")

}

Batch Processing Log Structure
Log_Type	Pattern	Purpose	Contains
FastQC Batch Logs	fastqc_batch_logs_*	Individual FastQC job logs, timing, and errors	Job logs, error logs, main batch log, PID file
TrimGalore Batch Logs	trimgalore_batch_logs_*	Individual TrimGalore job logs and statistics	Job logs, error logs, main batch log, PID file
Bowtie2 Batch Logs	bowtie2_batch_logs_*	Individual Bowtie2 alignment logs and metrics	Job logs, error logs, main batch log, PID file
Picard Batch Logs	picard_batch_logs_*	Individual Picard deduplication logs and metrics	Job logs, error logs, main batch log, PID file
GenomeCov Batch Logs	genomecov_batch_logs_*	Individual genome coverage generation logs	Job logs, error logs, main batch log, PID file

# Try to find actual log directories

actual_logs <- character(0)

for (pattern in log_types) {

  found <- find_log_dirs(pattern)

  if (length(found) > 0) {

    actual_logs <- c(actual_logs, found)

  }

}

Found Log Directories: - fastQC/fastQC_trimmed/fastqc_batch_logs_20260112_211938 - fastQC/fastQC_trimmed/fastqc_batch_logs_20260112_211938/individual_jobs - fastQC/fastQC_unTrimmed/fastqc_batch_logs_20260112_210944 - fastQC/fastQC_unTrimmed/fastqc_batch_logs_20260112_210944/individual_jobs - trimmedFastq/trimgalore_batch_logs_20260112_211408 - trimmedFastq/trimgalore_batch_logs_20260112_211408/individual_jobs - bams/bowtie2_batch_logs_20260112_212340 - bams/bowtie2_batch_logs_20260112_212340/individual_jobs - dedupBams/picard_batch_logs_20260112_233428 - dedupBams/picard_batch_logs_20260112_233428/individual_jobs - dedupBams/genomecov_batch_logs_20260112_231003 - dedupBams/genomecov_batch_logs_20260112_231003/individual_jobs - dedupBams/genomecov_batch_logs_20260112_234318 - dedupBams/genomecov_batch_logs_20260112_234318/individual_jobs

Performance Metrics and Summary

Pipeline Efficiency Features

efficiency_features <- data.frame(

  Feature = c(

    "Continuous Job Replacement",

    "Signal Immunity",

    "Comprehensive Logging",

    "Smart Skip Logic",

    "PID Tracking",

    "Progress Monitoring",

    "Resource Optimization"

  ),

  Description = c(

    "New jobs start immediately when others finish, maximizing CPU utilization",

    "Scripts immune to SIGHUP, SIGINT, SIGTERM - safe for remote execution",

    "Detailed logs for each job, main process, and error tracking",

    "Automatically skips files that have already been processed",

    "Tracks process IDs for job management and cleanup",

    "Regular status updates every 5 minutes during execution",

    "Parameterized job counts and thread usage for different system capacities"

  ),

  Benefit = c(

    "Reduced total processing time",

    "Prevents data loss from disconnections",

    "Easy troubleshooting and monitoring",

    "Enables pipeline restart/resume",

    "Clean termination and resource cleanup",

    "Real-time execution monitoring",

    "Optimal resource utilization"

  ),

  stringsAsFactors = FALSE

)

# Format table based on output

if(output_format == "latex") {

  kable(efficiency_features, caption = "Pipeline Efficiency and Reliability Features",

        booktabs = TRUE, longtable = TRUE) %>%

    kableExtra::kable_styling(latex_options = c("striped", "repeat_header"),

                            font_size = 8)

} else {

  kable(efficiency_features, caption = "Pipeline Efficiency and Reliability Features")

}

Pipeline Efficiency and Reliability Features
Feature	Description	Benefit
Continuous Job Replacement	New jobs start immediately when others finish, maximizing CPU utilization	Reduced total processing time
Signal Immunity	Scripts immune to SIGHUP, SIGINT, SIGTERM - safe for remote execution	Prevents data loss from disconnections
Comprehensive Logging	Detailed logs for each job, main process, and error tracking	Easy troubleshooting and monitoring
Smart Skip Logic	Automatically skips files that have already been processed	Enables pipeline restart/resume
PID Tracking	Tracks process IDs for job management and cleanup	Clean termination and resource cleanup
Progress Monitoring	Regular status updates every 5 minutes during execution	Real-time execution monitoring
Resource Optimization	Parameterized job counts and thread usage for different system capacities	Optimal resource utilization

Final Resource Requirements Summary

# Create final summary table

final_summary <- data.frame(

  Metric = c(

    "Total Pipeline Steps",

    "Core Processing Scripts",

    "Maximum Concurrent Jobs (default)",

    "Peak Thread Usage (default)",

    "Peak Memory Usage (default)",

    "Primary Output File Types"

  ),

  Value = c(

    "7 major steps",

    "8 specialized scripts",

    "16 jobs (2x FastQC steps)",

    paste0(peak_threads, " threads (", peak_threads/10, " FastQC jobs × 10 threads)"),

    paste0(peak_memory, " GB (", peak_memory/128, " Picard jobs × 128GB)"),

    "BigWig, BAM, FastQC reports, MultiQC summaries"

  ),

  stringsAsFactors = FALSE

)

# Format table based on output

if(output_format == "latex") {

  kable(final_summary, caption = "Pipeline Resource Requirements Summary",

        booktabs = TRUE, longtable = FALSE) %>%

    kableExtra::kable_styling(latex_options = c("striped", "hold_position"))

} else {

  kable(final_summary, caption = "Pipeline Resource Requirements Summary")

}

Pipeline Resource Requirements Summary
Metric	Value
Total Pipeline Steps	7 major steps
Core Processing Scripts	8 specialized scripts
Maximum Concurrent Jobs (default)	16 jobs (2x FastQC steps)
Peak Thread Usage (default)	160 threads (16 FastQC jobs × 10 threads)
Peak Memory Usage (default)	1024 GB (8 Picard jobs × 128GB)
Primary Output File Types	BigWig, BAM, FastQC reports, MultiQC summaries

Complete File Inventory

This section provides a comprehensive inventory of all files generated during the pipeline execution, organized by directory and file type.

# Function to format file sizes

format_size <- function(bytes) {

  if (is.na(bytes) || bytes == 0) return("0 B")

  units <- c("B", "KB", "MB", "GB", "TB")

  unit_index <- min(floor(log(bytes, 1024)) + 1, length(units))

  size_value <- bytes / (1024^(unit_index - 1))

  return(paste0(round(size_value, 2), " ", units[unit_index]))

}

# Function to get detailed file information with sizes (non-recursive for main files only)

get_detailed_file_info <- function(dir_path, base_path = ".") {

  full_path <- file.path(base_path, dir_path)

  

  if (!dir.exists(full_path)) {

    return(list(

      path = dir_path,

      folder_size = "0 B",

      files = data.frame(

        Filename = character(0),

        Size = character(0),

        Type = character(0),

        stringsAsFactors = FALSE

      )

    ))

  }

  # Get folder size (including subdirectories)

  folder_size_cmd <- paste0("du -sb '", full_path, "' 2>/dev/null | cut -f1")

  folder_size_result <- system(folder_size_cmd, intern = TRUE)

  folder_size_bytes <- if(length(folder_size_result) > 0) as.numeric(folder_size_result) else 0

  folder_size <- format_size(folder_size_bytes)

  

  # Get files in directory (NON-recursive to avoid log contamination)

  files <- list.files(full_path, full.names = TRUE, recursive = FALSE)

  files <- files[!dir.exists(files)] # Only files, not subdirectories

  

  if (length(files) == 0) {

    return(list(

      path = dir_path,

      folder_size = folder_size,

      files = data.frame(

        Filename = character(0),

        Size = character(0),

        Type = character(0),

        stringsAsFactors = FALSE

      )

    ))

  }

  # Get file info

  file_info <- file.info(files)

  filenames <- basename(files)

  sizes_bytes <- file_info$size

  sizes_formatted <- sapply(sizes_bytes, format_size)

  

  # Determine file types based on extensions

  get_file_type <- function(filename) {

    ext <- tools::file_ext(tolower(filename))

    if (ext == "") return("No extension")

    switch(ext,

      "fq.gz" = "FASTQ (compressed)",

      "fastq.gz" = "FASTQ (compressed)",

      "bam" = "BAM alignment",

      "bai" = "BAM index",

      "sam" = "SAM alignment",

      "bw" = "BigWig coverage",

      "bigwig" = "BigWig coverage",

      "bedgraph" = "BedGraph coverage",

      "gz" = "Compressed file",

      "html" = "HTML report",

      "zip" = "ZIP archive",

      "txt" = "Text file",

      "log" = "Log file",

      "pid" = "Process ID file",

      "json" = "JSON data",

      "csv" = "CSV data",

      "tsv" = "TSV data",

      "sizes" = "Chromosome sizes",

      paste0(toupper(ext), " file")

    )

  }

  file_types <- sapply(filenames, get_file_type)

  

  # Create data frame

  files_df <- data.frame(

    Filename = filenames,

    Size = sizes_formatted,

    Type = file_types,

    stringsAsFactors = FALSE

  )

  

  # Sort by size (convert back to numeric for sorting)

  size_numeric <- sapply(sizes_bytes, function(x) if(is.na(x)) 0 else x)

  files_df <- files_df[order(size_numeric, decreasing = TRUE), ]

  

  return(list(

    path = dir_path,

    folder_size = folder_size,

    files = files_df

  ))

}

# Define main pipeline directories to analyze (exclude log directories)

main_directories <- c(

  "fastQC/fastQC_unTrimmed",

  "fastQC/fastQC_trimmed", 

  "multiQC/multiQC_unTrimmed",

  "multiQC/multiQC_trimmed",

  "multiQC/multiQC_alignments",

  "multiQC/multiQC_deduplication",

  "trimmedFastq",

  "bams",

  "dedupBams",

  "bigwig"

)

# Get file information for all directories

all_file_info <- lapply(main_directories, get_detailed_file_info)

names(all_file_info) <- main_directories

# Generate output for each directory

for (i in seq_along(all_file_info)) {

  dir_name <- names(all_file_info)[i]

  dir_info <- all_file_info[[i]]

  

  cat("\n## Directory:", dir_name, "\n\n")

  cat("**Folder Path:** `", file.path(getwd(), dir_name), "`  \n")

  cat("**Folder Size:** ", dir_info$folder_size, "  \n\n")

  

  if (nrow(dir_info$files) > 0) {

    if(output_format == "latex") {

      print(kable(dir_info$files, caption = paste("Files in", dir_name),

                  booktabs = TRUE, longtable = FALSE) %>%

        kableExtra::kable_styling(latex_options = c("striped", "hold_position"),

                                font_size = 8))

    } else {

      print(kable(dir_info$files, caption = paste("Files in", dir_name)))

    }

  } else {

    cat("*No files found in this directory.*\n")

  }

  

  cat("\n")

}

Directory: fastQC/fastQC_unTrimmed

Folder Path: /dysk2/groupFolders/micgdu/bioinformatics/run4_Yaarob_hs_LAD/fastQC/fastQC_unTrimmed
Folder Size: 13.94 MB

No files found in this directory.

Directory: fastQC/fastQC_trimmed

Folder Path: /dysk2/groupFolders/micgdu/bioinformatics/run4_Yaarob_hs_LAD/fastQC/fastQC_trimmed
Folder Size: 13.67 MB

No files found in this directory.

Directory: multiQC/multiQC_unTrimmed

Folder Path: /dysk2/groupFolders/micgdu/bioinformatics/run4_Yaarob_hs_LAD/multiQC/multiQC_unTrimmed
Folder Size: 10 MB

Files in multiQC/multiQC_unTrimmed
	Filename	Size	Type
multiQC_unTrimmed.html	multiQC_unTrimmed.html	4.92 MB	HTML report

Directory: multiQC/multiQC_trimmed

Folder Path: /dysk2/groupFolders/micgdu/bioinformatics/run4_Yaarob_hs_LAD/multiQC/multiQC_trimmed
Folder Size: 10.02 MB

Files in multiQC/multiQC_trimmed
	Filename	Size	Type
multiQC_trimmed.html	multiQC_trimmed.html	4.91 MB	HTML report

Directory: multiQC/multiQC_alignments

Folder Path: /dysk2/groupFolders/micgdu/bioinformatics/run4_Yaarob_hs_LAD/multiQC/multiQC_alignments
Folder Size: 5.46 MB

Files in multiQC/multiQC_alignments
	Filename	Size	Type
multiQC_aligments.html	multiQC_aligments.html	4.58 MB	HTML report

Directory: multiQC/multiQC_deduplication

Folder Path: /dysk2/groupFolders/micgdu/bioinformatics/run4_Yaarob_hs_LAD/multiQC/multiQC_deduplication
Folder Size: 5.41 MB

Files in multiQC/multiQC_deduplication
	Filename	Size	Type
multiQC_deduplication.html	multiQC_deduplication.html	4.58 MB	HTML report

Directory: trimmedFastq

Folder Path: /dysk2/groupFolders/micgdu/bioinformatics/run4_Yaarob_hs_LAD/trimmedFastq
Folder Size: 18.14 GB

Directory: bams

Folder Path: /dysk2/groupFolders/micgdu/bioinformatics/run4_Yaarob_hs_LAD/bams
Folder Size: 23 GB

Directory: dedupBams

Folder Path: /dysk2/groupFolders/micgdu/bioinformatics/run4_Yaarob_hs_LAD/dedupBams
Folder Size: 14.97 GB

Directory: bigwig

Folder Path: /dysk2/groupFolders/micgdu/bioinformatics/run4_Yaarob_hs_LAD/bigwig
Folder Size: 1.35 GB

Files in bigwig
	Filename	Size	Type
pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bw	pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bw	290.14 MB	BigWig coverage
pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bw	pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bw	271.22 MB	BigWig coverage
pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bw	pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bw	226.98 MB	BigWig coverage
pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bw	pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bw	225.05 MB	BigWig coverage
pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bw	pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bw	188.91 MB	BigWig coverage
pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bw	pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bw	182.49 MB	BigWig coverage

File Type Summary

# Aggregate file information by type (only from main pipeline directories)

type_summary <- list()

for (dir_name in names(all_file_info)) {

  dir_info <- all_file_info[[dir_name]]

  if (nrow(dir_info$files) > 0) {

    for (i in 1:nrow(dir_info$files)) {

      file_type <- dir_info$files$Type[i]

      file_size_str <- dir_info$files$Size[i]

      

      # Convert size string back to bytes for aggregation

      size_parts <- strsplit(file_size_str, " ")[[1]]

      size_value <- as.numeric(size_parts[1])

      size_unit <- size_parts[2]

      size_bytes <- switch(size_unit,

        "B" = size_value,

        "KB" = size_value * 1024,

        "MB" = size_value * 1024^2,

        "GB" = size_value * 1024^3,

        "TB" = size_value * 1024^4,

        size_value

      )

      

      if (file_type %in% names(type_summary)) {

        type_summary[[file_type]]$count <- type_summary[[file_type]]$count + 1

        type_summary[[file_type]]$total_bytes <- type_summary[[file_type]]$total_bytes + size_bytes

        type_summary[[file_type]]$locations <- unique(c(type_summary[[file_type]]$locations, dir_name))

      } else {

        type_summary[[file_type]] <- list(

          count = 1,

          total_bytes = size_bytes,

          locations = dir_name

        )

      }

    }

  }

}

# Convert to data frame

if (length(type_summary) > 0) {

  type_summary_df <- data.frame(

    File_Type = names(type_summary),

    Count = sapply(type_summary, function(x) x$count),

    Total_Size = sapply(type_summary, function(x) format_size(x$total_bytes)),

    Locations = sapply(type_summary, function(x) paste(x$locations, collapse = ", ")),

    stringsAsFactors = FALSE

  )

  

  # Sort by total size (descending)

  size_bytes_for_sort <- sapply(type_summary, function(x) x$total_bytes)

  type_summary_df <- type_summary_df[order(size_bytes_for_sort, decreasing = TRUE), ]

  

  if(output_format == "latex") {

    kable(type_summary_df, caption = "File Type Summary - Pipeline Files Only",

          booktabs = TRUE, longtable = TRUE) %>%

      kableExtra::kable_styling(latex_options = c("striped", "repeat_header"),

                              font_size = 8)

  } else {

    kable(type_summary_df, caption = "File Type Summary - Pipeline Files Only")

  }

} else {

  cat("*No files found for type summary.*\n")

}

File Type Summary - Pipeline Files Only
	File_Type	Count	Total_Size	Locations
BAM alignment	BAM alignment	18	34.8 GB	bams, dedupBams
Compressed file	Compressed file	24	21.26 GB	trimmedFastq, dedupBams
BigWig coverage	BigWig coverage	6	1.35 GB	bigwig
BAM index	BAM index	24	79.81 MB	bams, dedupBams
HTML report	HTML report	4	18.99 MB	multiQC/multiQC_unTrimmed, multiQC/multiQC_trimmed, multiQC/multiQC_alignments, multiQC/multiQC_deduplication
No extension	No extension	1	2.67 MB	trimmedFastq
Text file	Text file	24	87.49 KB	trimmedFastq, bams, dedupBams
Chromosome sizes	Chromosome sizes	1	11.4 KB	dedupBams

Storage Summary

# Calculate total storage used by main pipeline directories only

total_bytes <- 0

directory_sizes <- data.frame(

  Directory = character(0),

  Size_GB = numeric(0),

  Percentage = numeric(0),

  stringsAsFactors = FALSE

)

for (dir_name in names(all_file_info)) {

  dir_info <- all_file_info[[dir_name]]

  

  # Convert folder size to bytes

  size_str <- dir_info$folder_size

  if (size_str != "0 B") {

    size_parts <- strsplit(size_str, " ")[[1]]

    size_value <- as.numeric(size_parts[1])

    size_unit <- size_parts[2]

    size_bytes <- switch(size_unit,

      "B" = size_value,

      "KB" = size_value * 1024,

      "MB" = size_value * 1024^2,

      "GB" = size_value * 1024^3,

      "TB" = size_value * 1024^4,

      size_value

    )

    

    total_bytes <- total_bytes + size_bytes

    directory_sizes <- rbind(directory_sizes, data.frame(

      Directory = dir_name,

      Size_GB = round(size_bytes / (1024^3), 3),

      Percentage = 0, # Will calculate after total

      stringsAsFactors = FALSE

    ))

  }

}

# Calculate percentages

if (nrow(directory_sizes) > 0) {

  directory_sizes$Percentage <- round((directory_sizes$Size_GB * 1024^3 / total_bytes) * 100, 1)

  directory_sizes <- directory_sizes[order(directory_sizes$Size_GB, decreasing = TRUE), ]

  

  if(output_format == "latex") {

    kable(directory_sizes, caption = "Storage Usage by Directory - Pipeline Files Only",

          booktabs = TRUE, longtable = FALSE) %>%

      kableExtra::kable_styling(latex_options = c("striped", "hold_position"))

  } else {

    kable(directory_sizes, caption = "Storage Usage by Directory - Pipeline Files Only")

  }

  

  cat("\n**Total Pipeline Storage Usage:** ", format_size(total_bytes), "\n")

} else {

  cat("*No directory size information available.*\n")

}

## 
## **Total Pipeline Storage Usage:**  57.52 GB

Conclusion

This automated DNA sequencing analysis pipeline provides a comprehensive workflow from raw FASTQ files to normalized genome coverage tracks. Key achievements include:

Automated Quality Control: Multi-stage QC with FastQC and MultiQC reporting
Robust Processing: Signal-immune batch processing with comprehensive logging
Scalable Architecture: Parameterized resource allocation for different system capacities
Production Ready: Skip logic enables restart/resume capabilities

Recommendations

Minimum system: 32 cores, 256GB RAM, 1TB+ storage
Optimal system: 64+ cores, 512GB+ RAM, fast SSD storage
Monitor disk space: Intermediate files can be 3-5x input size
Use parameter tuning based on available resources

Generated Files and Locations

All output files are organized in the specified output directory with the following structure:

Analysis Results: BAM files, coverage tracks, quality reports
Logs and Metrics: Comprehensive logging for troubleshooting and monitoring
Intermediate Files: Trimmed FASTQ files, alignment statistics, duplication metrics

Report generated automatically by the DNAfastqBigWig pipeline reporting system.

For questions about this pipeline, consult the individual script documentation and log files.

	Filename	Size	Type
pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R2_001_val_2.fq.gz	pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R2_001_val_2.fq.gz	2 GB	Compressed file
pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz	pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz	2 GB	Compressed file
pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz	pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz	1.76 GB	Compressed file
pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R2_001_val_2.fq.gz	pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R2_001_val_2.fq.gz	1.75 GB	Compressed file
pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R2_001_val_2.fq.gz	pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R2_001_val_2.fq.gz	1.52 GB	Compressed file
pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz	pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz	1.52 GB	Compressed file
pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R2_001_val_2.fq.gz	pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R2_001_val_2.fq.gz	1.47 GB	Compressed file
pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz	pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz	1.47 GB	Compressed file
pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R2_001_val_2.fq.gz	pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R2_001_val_2.fq.gz	1.17 GB	Compressed file
pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz	pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz	1.17 GB	Compressed file
pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R2_001_val_2.fq.gz	pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R2_001_val_2.fq.gz	1.16 GB	Compressed file
pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz	pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz	1.16 GB	Compressed file
–passthrough	–passthrough	2.67 MB	No extension
pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R2_001.fastq.gz_trimming_report.txt	pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R2_001.fastq.gz_trimming_report.txt	6.06 KB	Text file
pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R2_001.fastq.gz_trimming_report.txt	pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R2_001.fastq.gz_trimming_report.txt	6.04 KB	Text file
pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R2_001.fastq.gz_trimming_report.txt	pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R2_001.fastq.gz_trimming_report.txt	6.04 KB	Text file
pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R2_001.fastq.gz_trimming_report.txt	pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R2_001.fastq.gz_trimming_report.txt	6.01 KB	Text file
pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R2_001.fastq.gz_trimming_report.txt	pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R2_001.fastq.gz_trimming_report.txt	5.94 KB	Text file
pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R2_001.fastq.gz_trimming_report.txt	pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R2_001.fastq.gz_trimming_report.txt	5.9 KB	Text file
pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001.fastq.gz_trimming_report.txt	pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001.fastq.gz_trimming_report.txt	5.87 KB	Text file
pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001.fastq.gz_trimming_report.txt	pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001.fastq.gz_trimming_report.txt	5.86 KB	Text file
pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001.fastq.gz_trimming_report.txt	pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001.fastq.gz_trimming_report.txt	5.85 KB	Text file
pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001.fastq.gz_trimming_report.txt	pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001.fastq.gz_trimming_report.txt	5.84 KB	Text file
pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001.fastq.gz_trimming_report.txt	pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001.fastq.gz_trimming_report.txt	5.79 KB	Text file
pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001.fastq.gz_trimming_report.txt	pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001.fastq.gz_trimming_report.txt	5.75 KB	Text file

	Filename	Size	Type
pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam	pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam	2.43 GB	BAM alignment
pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChr.bam	pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChr.bam	2.42 GB	BAM alignment
pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam	pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam	2.22 GB	BAM alignment
pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChr.bam	pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChr.bam	2.21 GB	BAM alignment
pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam	pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam	1.89 GB	BAM alignment
pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChr.bam	pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChr.bam	1.88 GB	BAM alignment
pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam	pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam	1.88 GB	BAM alignment
pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChr.bam	pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChr.bam	1.87 GB	BAM alignment
pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam	pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam	1.58 GB	BAM alignment
pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChr.bam	pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChr.bam	1.57 GB	BAM alignment
pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam	pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam	1.53 GB	BAM alignment
pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChr.bam	pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChr.bam	1.52 GB	BAM alignment
pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChr.bai	pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChr.bai	2.18 MB	BAM index
pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChr.bai	pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChr.bai	2.17 MB	BAM index
pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChr.bai	pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChr.bai	2.16 MB	BAM index
pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChr.bai	pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChr.bai	2.13 MB	BAM index
pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChr.bai	pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChr.bai	2.09 MB	BAM index
pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChr.bai	pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChr.bai	2.04 MB	BAM index
pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.txt	pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.txt	643 B	Text file
pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.txt	pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.txt	643 B	Text file
pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.txt	pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.txt	643 B	Text file
pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.txt	pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.txt	642 B	Text file
pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.txt	pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.txt	639 B	Text file
pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.txt	pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.txt	639 B	Text file

	Filename	Size	Type
pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam	pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam	2.49 GB	BAM alignment
pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam	pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam	2.28 GB	BAM alignment
pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam	pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam	1.93 GB	BAM alignment
pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam	pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam	1.92 GB	BAM alignment
pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam	pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam	1.61 GB	BAM alignment
pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam	pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam	1.57 GB	BAM alignment
pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bedGraph.gz	pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bedGraph.gz	335.96 MB	Compressed file
pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bedGraph.gz	pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bedGraph.gz	335.88 MB	Compressed file
pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bedGraph.gz	pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bedGraph.gz	318.04 MB	Compressed file
pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bedGraph.gz	pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bedGraph.gz	317.44 MB	Compressed file
pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bedGraph.gz	pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bedGraph.gz	260.33 MB	Compressed file
pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bedGraph.gz	pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bedGraph.gz	260.26 MB	Compressed file
pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bedGraph.gz	pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bedGraph.gz	259.5 MB	Compressed file
pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bedGraph.gz	pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bedGraph.gz	258.75 MB	Compressed file
pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bedGraph.gz	pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bedGraph.gz	214.01 MB	Compressed file
pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bedGraph.gz	pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bedGraph.gz	213 MB	Compressed file
pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bedGraph.gz	pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bedGraph.gz	206.21 MB	Compressed file
pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bedGraph.gz	pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_Snorm.bedGraph.gz	204.9 MB	Compressed file
pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bai	pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bai	7.11 MB	BAM index
pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bai	pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bai	7.02 MB	BAM index
pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bai	pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bai	6.89 MB	BAM index
pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bai	pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bai	6.82 MB	BAM index
pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bai	pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bai	6.81 MB	BAM index
pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bai	pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bai	6.8 MB	BAM index
pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bai	pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bai	2.18 MB	BAM index
pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_SortStChr.bam.bai	pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_SortStChr.bam.bai	2.18 MB	BAM index
pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_SortStChr.bam.bai	pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_SortStChr.bam.bai	2.17 MB	BAM index
pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bai	pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bai	2.17 MB	BAM index
pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_SortStChr.bam.bai	pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_SortStChr.bam.bai	2.15 MB	BAM index
pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bai	pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bai	2.15 MB	BAM index
pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_SortStChr.bam.bai	pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_SortStChr.bam.bai	2.14 MB	BAM index
pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bai	pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bai	2.13 MB	BAM index
pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_SortStChr.bam.bai	pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_SortStChr.bam.bai	2.1 MB	BAM index
pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bai	pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bai	2.09 MB	BAM index
pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_SortStChr.bam.bai	pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam_SortStChr.bam.bai	2.07 MB	BAM index
pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bai	pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup.bam.bai	2.06 MB	BAM index
hg38.chrom.sizes	hg38.chrom.sizes	11.4 KB	Chromosome sizes
pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup_rep.txt	pADamID_NTERT_D0_LaminAC_rep1_S29_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup_rep.txt	2.13 KB	Text file
pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup_rep.txt	pADamID_NTERT_D0_LaminB1_Rep1_S28_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup_rep.txt	2.13 KB	Text file
pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup_rep.txt	pADamID_NTERT_D0_OnlyDam_Rep1_S27_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup_rep.txt	2.13 KB	Text file
pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup_rep.txt	pADamID_NTERT_D7_OnlyDam_Rep1_S30_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup_rep.txt	2.13 KB	Text file
pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup_rep.txt	pADamID_NTERT_D7_LaminAC_rep1_S32_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup_rep.txt	2.13 KB	Text file
pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup_rep.txt	pADamID_NTERT_D7_LaminB1_Rep1_S31_L008_R1_001_val_1.fq.gz.sorted_stChrH.bam_dedup_rep.txt	2.13 KB	Text file