This script provides the core analytical pipeline for a PhD thesis developing cfDNA-based biomarkers for hepatocellular carcinoma (HCC). It analyzes cfDNA fragmentomics features from whole-genome sequencing data across three cohorts: healthy controls (n=150), patients with chronic liver disease (n=186), and patients with HCC (n=139). The primary objective is to identify and validate robust biomarkers that can accurately distinguish between these clinical groups, addressing the critical need for non-invasive HCC diagnostics.
# Function to safely install and load packages
install_and_load <- function(packages, bioconductor = FALSE) {
# Set a default CRAN mirror if not already set
if (is.null(getOption("repos")) || getOption("repos")["CRAN"] == "@CRAN@") {
options(repos = c(CRAN = "https://cran.r-project.org"))
}
for (package in packages) {
# Check if package is installed
if (!requireNamespace(package, quietly = TRUE)) {
message(paste("Installing package:", package))
# Install from appropriate source
if (bioconductor) {
if (!requireNamespace("BiocManager", quietly = TRUE)) {
install.packages("BiocManager")
}
BiocManager::install(package, update = FALSE, ask = FALSE)
} else {
install.packages(package)
}
}
# Load package
suppressWarnings(suppressPackageStartupMessages(
library(package, character.only = TRUE)
))
}
}
# --- Package Installation ---
# 1. Install and load remotes package first, as it's needed for GitHub installations
install_and_load("remotes")
# 2. Install maftools from GitHub if needed
if (!requireNamespace("maftools", quietly = TRUE)) {
message("Installing maftools from GitHub...")
remotes::install_github("PoisonAlien/maftools", quiet = TRUE)
}
suppressWarnings(suppressPackageStartupMessages(library(maftools)))
# 3. Define CRAN and Bioconductor packages
cran_packages <- c("dunn.test", "PMCMRplus", "FSA", "rstatix",
"evaluate", "knitr", "dplyr", "ggplot2", "magrittr", "grid",
"broom.helpers", "modelsummary", "kableExtra", "tinytex", "ggplotify",
"tidyverse", "car", "multcomp", "agricolae", "kableExtra", "coin", "moments","nortest","reshape2",
"dendextend","viridis", "tidytext","kableExtra")
bioc_packages <- c("ggsurvfit", "gtsummary", "tidycmprsk", "ranger",
"survminer", "survcomp", "pheatmap", "patchwork","RColorBrewer")
# 4. Install and load packages from CRAN and Bioconductor
install_and_load(cran_packages)
install_and_load(bioc_packages, bioconductor = TRUE)
# 5. Install TinyTeX for PDF generation if not already installed
if (!requireNamespace("tinytex", quietly = TRUE)) {
install.packages("tinytex")
}
if (!tinytex::is_tinytex()) {
tinytex::install_tinytex()
}
# 6. Install UHHformats from the correct GitHub repository
# The repository path has been corrected thanks to your input.
if (!requireNamespace("UHHformats", quietly = TRUE)) {
message("Installing UHHformats from GitHub (uham-bio/UHHformats)...")
# Using the correct repository. build_vignettes = FALSE for faster, non-interactive installation.
remotes::install_github("uham-bio/UHHformats", quiet = TRUE, build_vignettes = FALSE)
}
# 7. Load UHHformats
# With the correct installation source, we can simplify the loading logic.
if (requireNamespace("UHHformats", quietly = TRUE)) {
suppressWarnings(suppressPackageStartupMessages(library(UHHformats)))
} else {
warning("UHHformats package could not be installed or loaded.")
}
# --- Bibliography Generation ---
# Create bibliography directory if it doesn't exist
if (!dir.exists("bib")) {
dir.create("bib")
}
# Generate package references for all loaded packages and essential rendering packages
# Start with the list of currently loaded packages
all_packages <- unique(.packages())
# Add essential packages for rendering the document that might not be attached
all_packages <- unique(c(all_packages, 'rmarkdown', 'knitr'))
# Add UHHformats to the bibliography list ONLY if it is currently loaded
if ("UHHformats" %in% .packages()) {
all_packages <- unique(c(all_packages, 'UHHformats'))
}
# Write the .bib file
message("Writing bibliography file to bib/packages.bib...")
knitr::write_bib(x = all_packages, file = 'bib/packages.bib')
message("Package setup complete.")# Read the data
data <- read.csv("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 3 (Diagnostic Indicators for HCC)/HCC_Diagnostic_Tables/Training_set_final_01062026.csv")
# Check the structure of the data
str(data)## 'data.frame': 475 obs. of 362 variables:
## $ Sample_ID : chr "CGPLH1000P_Healthy" "CGPLH1001P_Healthy" "CGPLH1002P_Healthy" "CGPLH1003P_Healthy" ...
## $ Group : chr "Healthy" "Healthy" "Healthy" "Healthy" ...
## $ TF_Score : num 0.00127 0.00105 0.00113 0.00156 0.00102 ...
## $ DELFI_Score : num 0.225 0.233 0.206 0.258 0.23 ...
## $ mtcfDNA_fraction: num 0.000713 0.000513 0.0011 0.000732 0.003268 ...
## $ P.1.10. : num 1.92e-06 2.76e-06 2.85e-06 2.16e-06 2.21e-06 2.56e-06 2.16e-06 2.10e-06 2.50e-06 2.72e-06 ...
## $ P.1.20. : num 2.52e-05 2.68e-05 2.46e-05 2.67e-05 3.30e-05 2.56e-05 2.13e-05 1.95e-05 2.11e-05 1.85e-05 ...
## $ P.1.30. : num 7.86e-06 1.03e-05 1.06e-05 7.79e-06 8.09e-06 9.05e-06 7.92e-06 7.20e-06 9.69e-06 9.05e-06 ...
## $ P.1.40. : num 2.57e-05 2.90e-05 2.95e-05 2.56e-05 3.20e-05 3.73e-05 2.64e-05 2.40e-05 2.58e-05 2.38e-05 ...
## $ P.1.50. : num 5.72e-05 6.03e-05 5.76e-05 5.61e-05 6.95e-05 7.63e-05 5.38e-05 4.71e-05 5.69e-05 4.95e-05 ...
## $ P.1.60. : num 0.000108 0.000106 0.000102 0.000108 0.00013 ...
## $ P.1.70. : num 0.00019 0.000181 0.000171 0.000193 0.000231 ...
## $ P.1.80. : num 0.000349 0.000326 0.000299 0.000355 0.000425 ...
## $ P.1.90. : num 0.000721 0.000859 0.000584 0.000886 0.001087 ...
## $ P.1.100. : num 0.00226 0.00244 0.00189 0.00271 0.00317 ...
## $ P.10.20. : num 2.86e-06 3.85e-06 3.97e-06 2.63e-06 2.69e-06 2.78e-06 2.47e-06 2.66e-06 3.25e-06 2.90e-06 ...
## $ P.20.30. : num 3.08e-06 3.73e-06 3.78e-06 2.99e-06 3.19e-06 3.71e-06 3.29e-06 2.43e-06 3.94e-06 3.43e-06 ...
## $ P.20.40. : num 8.12e-05 8.14e-05 7.81e-05 7.97e-05 9.62e-05 7.68e-05 6.98e-05 5.97e-05 6.08e-05 5.71e-05 ...
## $ P.30.40. : num 1.78e-05 1.87e-05 1.89e-05 1.78e-05 2.39e-05 2.83e-05 1.85e-05 1.68e-05 1.62e-05 1.48e-05 ...
## $ P.30.60. : num 1.01e-04 9.60e-05 9.12e-05 1.00e-04 1.22e-04 ...
## $ P.40.50. : num 3.15e-05 3.13e-05 2.80e-05 3.04e-05 3.75e-05 3.90e-05 2.74e-05 2.31e-05 3.10e-05 2.57e-05 ...
## $ P.40.60. : num 0.00019 0.000177 0.000176 0.000186 0.000219 ...
## $ P.40.80. : num 0.000323 0.000297 0.00027 0.000329 0.000393 ...
## $ P.50.60. : num 5.13e-05 4.60e-05 4.42e-05 5.21e-05 6.09e-05 5.85e-05 4.35e-05 3.89e-05 4.74e-05 4.54e-05 ...
## $ P.50.100. : num 0.0022 0.00238 0.00183 0.00265 0.0031 ...
## $ P.60.70. : num 8.15e-05 7.49e-05 6.96e-05 8.46e-05 1.01e-04 ...
## $ P.60.80. : num 0.000363 0.000338 0.000321 0.000365 0.00043 ...
## $ P.60.90. : num 0.000612 0.000753 0.000482 0.000778 0.000956 ...
## $ P.60.120. : num 0.0154 0.015 0.0128 0.018 0.0194 ...
## $ P.70.80. : num 0.000159 0.000145 0.000128 0.000162 0.000194 ...
## $ P.70.140. : num 0.0779 0.0814 0.0706 0.0915 0.0869 ...
## $ P.80.90. : num 0.000372 0.000533 0.000285 0.000531 0.000662 ...
## $ P.80.100. : num 0.00197 0.00218 0.00167 0.00241 0.00282 ...
## $ P.80.120. : num 0.0151 0.0147 0.0126 0.0178 0.0191 ...
## $ P.80.160. : num 0.321 0.321 0.301 0.341 0.31 ...
## $ P.90.100. : num 0.00154 0.00158 0.0013 0.00182 0.00209 ...
## $ P.90.120. : num 0.0148 0.0142 0.0123 0.0172 0.0184 ...
## $ P.90.180. : num 0.815 0.783 0.789 0.805 0.755 ...
## $ P.100.110. : num 0.0043 0.00399 0.00346 0.00494 0.00527 ...
## $ P.100.120. : num 0.013 0.0122 0.0108 0.0151 0.0159 ...
## $ P.100.150. : num 0.172 0.177 0.16 0.193 0.175 ...
## $ P.100.200. : num 0.956 0.942 0.946 0.951 0.93 ...
## $ P.110.120. : num 0.00893 0.00863 0.00754 0.01049 0.01107 ...
## $ P.120.130. : num 0.0179 0.0189 0.016 0.0215 0.0212 ...
## $ P.120.140. : num 0.0609 0.0638 0.0558 0.0713 0.0653 ...
## $ P.120.150. : num 0.159 0.164 0.149 0.177 0.159 ...
## $ P.120.160. : num 0.305 0.306 0.289 0.323 0.291 ...
## $ P.120.180. : num 0.8 0.769 0.777 0.788 0.737 ...
## $ P.130.140. : num 0.0447 0.0476 0.0419 0.0521 0.0464 ...
## $ P.140.150. : num 0.0963 0.0979 0.091 0.1036 0.0915 ...
## $ P.140.160. : num 0.236 0.23 0.222 0.242 0.216 ...
## $ P.140.210. : num 0.9 0.889 0.902 0.884 0.877 ...
## $ P.150.160. : num 0.146 0.142 0.14 0.146 0.132 ...
## $ P.150.180. : num 0.641 0.604 0.628 0.61 0.578 ...
## $ P.150.200. : num 0.784 0.765 0.786 0.759 0.755 ...
## $ P.160.170. : num 0.275 0.254 0.266 0.257 0.238 ...
## $ P.160.180. : num 0.481 0.444 0.471 0.451 0.43 ...
## $ P.160.200. : num 0.637 0.623 0.647 0.613 0.623 ...
## $ P.160.240. : num 0.675 0.672 0.693 0.653 0.68 ...
## $ P.170.180. : num 0.22 0.208 0.222 0.207 0.208 ...
## $ P.180.190. : num 0.0994 0.1079 0.1083 0.1018 0.1165 ...
## $ P.180.200. : num 0.138 0.154 0.152 0.143 0.17 ...
## $ P.180.210. : num 0.163 0.187 0.183 0.17 0.207 ...
## $ P.180.240. : num 0.18 0.21 0.205 0.188 0.234 ...
## $ P.180.270. : num 0.183 0.214 0.209 0.192 0.239 ...
## $ P.190.200. : num 0.0428 0.0528 0.0499 0.0464 0.0604 ...
## $ P.200.210. : num 0.0204 0.0267 0.0249 0.0222 0.0306 ...
## $ P.200.220. : num 0.0295 0.0379 0.0359 0.0315 0.0441 ...
## $ P.200.240. : num 0.0376 0.049 0.0465 0.04 0.0571 ...
## $ P.200.250. : num 0.039 0.0508 0.0484 0.0416 0.0594 ...
## $ P.200.300. : num 0.042 0.0557 0.0518 0.046 0.0667 ...
## $ P.210.220. : num 0.00991 0.01304 0.01238 0.01044 0.01515 ...
## $ P.210.240. : num 0.0172 0.0223 0.0215 0.0178 0.0265 ...
## $ P.210.280. : num 0.0216 0.0277 0.0269 0.0227 0.0327 ...
## $ P.220.230. : num 0.00472 0.00621 0.00604 0.00489 0.0075 ...
## $ P.220.240. : num 0.00705 0.00887 0.0088 0.00712 0.01093 ...
## $ P.230.240. : num 0.00253 0.00307 0.0031 0.00247 0.00384 ...
## $ P.240.250. : num 0.00142 0.00183 0.00192 0.0016 0.00229 ...
## $ P.240.260. : num 0.00249 0.003 0.0031 0.00266 0.00373 ...
## $ P.240.270. : num 0.00357 0.00422 0.00435 0.00379 0.00506 ...
## $ P.240.280. : num 0.0044 0.00536 0.00537 0.0049 0.00624 ...
## $ P.240.300. : num 0.0044 0.00672 0.00537 0.00606 0.00964 ...
## $ P.240.320. : num 0.0044 0.00672 0.00537 0.00606 0.00964 ...
## $ P.250.260. : num 0.00114 0.0013 0.00131 0.00115 0.00158 ...
## $ P.250.300. : num 0.00298 0.00489 0.00345 0.00445 0.00735 ...
## $ P.260.270. : num 0.000999 0.001083 0.001122 0.001045 0.001196 ...
## $ P.260.280. : num 0.002 0.00213 0.00218 0.00208 0.00229 ...
## $ P.270.280. : num 0.000833 0.001141 0.00102 0.001107 0.001177 ...
## $ P.270.300. : num 0.000833 0.002501 0.00102 0.002262 0.004578 ...
## $ P.270.360. : num 0.000833 0.002501 0.00102 0.002262 0.004578 ...
## $ P.280.290. : num 0 0.00136 0 0.00116 0.00148 ...
## $ P.280.300. : num 0.00366 0.00375 0.00361 0.0036 0.00353 ...
## $ P.280.320. : num 0 0.00136 0 0.00116 0.0034 ...
## $ P.280.350. : num 0 0.00136 0 0.00116 0.0034 ...
## $ P.290.300. : num 0 0 0 0 0.00192 ...
## $ P.300.310. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ P.300.320. : num 0.00758 0.00831 0.00767 0.00752 0.00754 ...
## $ P.300.330. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ P.300.350. : num 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
## Sample_ID Group TF_Score DELFI_Score mtcfDNA_fraction P.1.10.
## 1 CGPLH1000P_Healthy Healthy 0.001271 0.224852 0.000713 1.92e-06
## 2 CGPLH1001P_Healthy Healthy 0.001048 0.232668 0.000513 2.76e-06
## 3 CGPLH1002P_Healthy Healthy 0.001135 0.206212 0.001100 2.85e-06
## 4 CGPLH1003P_Healthy Healthy 0.001559 0.257690 0.000732 2.16e-06
## 5 CGPLH1004P_Healthy Healthy 0.001024 0.230369 0.003268 2.21e-06
## 6 CGPLH1005P_Healthy Healthy 0.001137 0.223513 0.000627 2.56e-06
## P.1.20. P.1.30. P.1.40. P.1.50. P.1.60. P.1.70. P.1.80.
## 1 2.52e-05 7.86e-06 2.57e-05 5.72e-05 0.00010848 0.00018996 0.00034861
## 2 2.68e-05 1.03e-05 2.90e-05 6.03e-05 0.00010637 0.00018123 0.00032618
## 3 2.46e-05 1.06e-05 2.95e-05 5.76e-05 0.00010179 0.00017138 0.00029913
## 4 2.67e-05 7.79e-06 2.56e-05 5.61e-05 0.00010820 0.00019276 0.00035508
## 5 3.30e-05 8.09e-06 3.20e-05 6.95e-05 0.00013048 0.00023127 0.00042499
## 6 2.56e-05 9.05e-06 3.73e-05 7.63e-05 0.00013486 0.00021398 0.00035623
## P.1.90. P.1.100. P.10.20. P.20.30. P.20.40. P.30.40. P.30.60. P.40.50.
## 1 0.00072056 0.00226138 2.86e-06 3.08e-06 8.12e-05 1.78e-05 0.00010062 3.15e-05
## 2 0.00085920 0.00243872 3.85e-06 3.73e-06 8.14e-05 1.87e-05 0.00009600 3.13e-05
## 3 0.00058365 0.00188779 3.97e-06 3.78e-06 7.81e-05 1.89e-05 0.00009120 2.80e-05
## 4 0.00088615 0.00270756 2.63e-06 2.99e-06 7.97e-05 1.78e-05 0.00010042 3.04e-05
## 5 0.00108656 0.00317432 2.69e-06 3.19e-06 9.62e-05 2.39e-05 0.00012239 3.75e-05
## 6 0.00082914 0.00229093 2.78e-06 3.71e-06 7.68e-05 2.83e-05 0.00012581 3.90e-05
## P.40.60. P.40.80. P.50.60. P.50.100. P.60.70. P.60.80. P.60.90.
## 1 0.00019020 0.00032294 5.13e-05 0.00220422 0.0000815 0.00036289 0.00061207
## 2 0.00017702 0.00029717 4.60e-05 0.00237837 0.0000749 0.00033776 0.00075284
## 3 0.00017594 0.00026958 4.42e-05 0.00183021 0.0000696 0.00032051 0.00048186
## 4 0.00018552 0.00032945 5.21e-05 0.00265149 0.0000846 0.00036547 0.00077794
## 5 0.00021883 0.00039295 6.09e-05 0.00310479 0.0001008 0.00042968 0.00095609
## 6 0.00017804 0.00031889 5.85e-05 0.00221458 0.0000791 0.00031872 0.00069428
## P.60.120. P.70.80. P.70.140. P.80.90. P.80.100. P.80.120. P.80.160.
## 1 0.01537820 0.00015865 0.07794972 0.00037195 0.00197408 0.01513807 0.3205178
## 2 0.01495560 0.00014495 0.08136730 0.00053302 0.00217635 0.01473579 0.3210052
## 3 0.01278487 0.00012775 0.07057450 0.00028452 0.00166980 0.01258753 0.3011379
## 4 0.01802736 0.00016232 0.09150606 0.00053107 0.00240517 0.01778048 0.3410335
## 5 0.01938781 0.00019371 0.08693940 0.00066158 0.00281820 0.01909330 0.3101904
## 6 0.01450267 0.00014225 0.07979761 0.00047291 0.00197823 0.01428130 0.3088499
## P.90.100. P.90.120. P.90.180. P.100.110. P.100.120. P.100.150. P.100.200.
## 1 0.00154083 0.01476612 0.8150515 0.00429833 0.01297260 0.1722271 0.9557386
## 2 0.00157952 0.01420276 0.7827151 0.00399373 0.01223760 0.1770198 0.9418744
## 3 0.00130415 0.01230300 0.7893111 0.00346127 0.01077330 0.1598179 0.9462750
## 4 0.00182141 0.01724941 0.8048547 0.00494235 0.01506375 0.1925962 0.9512647
## 5 0.00208776 0.01843172 0.7552904 0.00527143 0.01594124 0.1754659 0.9300836
## 6 0.00146179 0.01380840 0.7618336 0.00372940 0.01195419 0.1719612 0.9334588
## P.110.120. P.120.130. P.120.140. P.120.150. P.120.160. P.120.180. P.130.140.
## 1 0.00892697 0.01792847 0.06089326 0.1590018 0.3053798 0.8002853 0.04472454
## 2 0.00862952 0.01887180 0.06377190 0.1643966 0.3062694 0.7685123 0.04761476
## 3 0.00753759 0.01597703 0.05579454 0.1488190 0.2885504 0.7770081 0.04188220
## 4 0.01048565 0.02149663 0.07131890 0.1771682 0.3232530 0.7876053 0.05206662
## 5 0.01107253 0.02122286 0.06533301 0.1591220 0.2910971 0.7368586 0.04642953
## 6 0.00861721 0.01940487 0.06273912 0.1596146 0.2945686 0.7480252 0.04596919
## P.140.150. P.140.160. P.140.210. P.150.160. P.150.180. P.150.200. P.160.170.
## 1 0.09634883 0.2357995 0.9002941 0.1463779 0.6412835 0.7835115 0.2751536
## 2 0.09791000 0.2300145 0.8894173 0.1418729 0.6041157 0.7648546 0.2538522
## 3 0.09095976 0.2222594 0.9023600 0.1397314 0.6281891 0.7864572 0.2661824
## 4 0.10360499 0.2421560 0.8844456 0.1460848 0.6104370 0.7586685 0.2569860
## 5 0.09146957 0.2156335 0.8767035 0.1319751 0.5777367 0.7546177 0.2376068
## 6 0.09424053 0.2200718 0.8855377 0.1349540 0.5884106 0.7614976 0.2419553
## P.160.180. P.160.200. P.160.240. P.170.180. P.180.190. P.180.200. P.180.210.
## 1 0.4813707 0.6371336 0.6747355 0.2197520 0.09943374 0.1379590 0.1626618
## 2 0.4438643 0.6229817 0.6719493 0.2083907 0.10791691 0.1538091 0.1873916
## 3 0.4709955 0.6467258 0.6931947 0.2222753 0.10832770 0.1521923 0.1832111
## 4 0.4505759 0.6125837 0.6525562 0.2073662 0.10180785 0.1433754 0.1704035
## 5 0.4303480 0.6226425 0.6797481 0.2081548 0.11648551 0.1704273 0.2074972
## 6 0.4357395 0.6265435 0.6824650 0.2115013 0.11524121 0.1657891 0.2028865
## P.180.240. P.180.270. P.190.200. P.200.210. P.200.220. P.200.240. P.200.250.
## 1 0.1798299 0.1833952 0.04279428 0.02043379 0.02946796 0.03760186 0.03902489
## 2 0.2097064 0.2139251 0.05282196 0.02665271 0.03794541 0.04896758 0.05079826
## 3 0.2047370 0.2090856 0.04994038 0.02494302 0.03587537 0.04646890 0.04838731
## 4 0.1882039 0.1919971 0.04642362 0.02217208 0.03152955 0.03997246 0.04157358
## 5 0.2339866 0.2390447 0.06039549 0.03061622 0.04409517 0.05710556 0.05939157
## 6 0.2290084 0.2343700 0.05784576 0.02979956 0.04281935 0.05592144 0.05823635
## P.200.300. P.210.220. P.210.240. P.210.280. P.220.230. P.220.240. P.230.240.
## 1 0.04199998 0.00991246 0.01716807 0.02156618 0.00472396 0.00705261 0.00253164
## 2 0.05568687 0.01304123 0.02231487 0.02767407 0.00620512 0.00887315 0.00306851
## 3 0.05183716 0.01238131 0.02152589 0.02689415 0.00603965 0.00879559 0.00310493
## 4 0.04602770 0.01043997 0.01780038 0.02270057 0.00489064 0.00712120 0.00246977
## 5 0.06674208 0.01515415 0.02648934 0.03272438 0.00749696 0.01093269 0.00383823
## 6 0.06425028 0.01496026 0.02612189 0.03287465 0.00738315 0.01067695 0.00377848
## P.240.250. P.240.260. P.240.270. P.240.280. P.240.300. P.240.320. P.250.260.
## 1 0.00142303 0.00248932 0.00356536 0.00439812 0.00439812 0.00439812 0.00114338
## 2 0.00183069 0.00300115 0.00421866 0.00535920 0.00671929 0.00671929 0.00130496
## 3 0.00191841 0.00310136 0.00434861 0.00536826 0.00536826 0.00536826 0.00130781
## 4 0.00160112 0.00266020 0.00379321 0.00490019 0.00605525 0.00605525 0.00114722
## 5 0.00228601 0.00372674 0.00505813 0.00623504 0.00963652 0.00963652 0.00157565
## 6 0.00231491 0.00383380 0.00536159 0.00675276 0.00832884 0.00832884 0.00168950
## P.250.300. P.260.270. P.260.280. P.270.280. P.270.300. P.270.360. P.280.290.
## 1 0.00297509 0.00099895 0.00199601 0.00083276 0.00083276 0.00083276 0.00000000
## 2 0.00488861 0.00108301 0.00212631 0.00114055 0.00250064 0.00250064 0.00136009
## 3 0.00344985 0.00112239 0.00217718 0.00101965 0.00101965 0.00101965 0.00000000
## 4 0.00445412 0.00104487 0.00207949 0.00110698 0.00226204 0.00226204 0.00115506
## 5 0.00735051 0.00119647 0.00228592 0.00117691 0.00457839 0.00457839 0.00148196
## 6 0.00601393 0.00135718 0.00262894 0.00139118 0.00296726 0.00296726 0.00157608
## P.280.300. P.280.320. P.280.350. P.290.300. P.300.310. P.300.320. P.300.330.
## 1 0.00365540 0.00000000 0.00000000 0.00000000 0 0.00757653 0
## 2 0.00374697 0.00136009 0.00136009 0.00000000 0 0.00830685 0
## 3 0.00361225 0.00000000 0.00000000 0.00000000 0 0.00766506 0
## 4 0.00360190 0.00115506 0.00115506 0.00000000 0 0.00751969 0
## 5 0.00352748 0.00340148 0.00340148 0.00191952 0 0.00753975 0
## 6 0.00413155 0.00157608 0.00157608 0.00000000 0 0.00826162 0
## P.300.350. P.300.360. P.300.400. P.310.320. P.320.330. P.320.340. P.320.360.
## 1 0 0 0 0 0 0.00771922 0
## 2 0 0 0 0 0 0.01052846 0
## 3 0 0 0 0 0 0.00890304 0
## 4 0 0 0 0 0 0.00874517 0
## 5 0 0 0 0 0 0.00919688 0
## 6 0 0 0 0 0 0.01002904 0
## P.320.400. AACC ACCC CCCT CTAA TAAC
## 1 0 0.003143340 0.003796098 0.01160814 0.003353300 0.002407976
## 2 0 0.003270072 0.003847939 0.01147993 0.003222585 0.002438626
## 3 0 0.003227095 0.003806106 0.01159249 0.003332356 0.002349842
## 4 0 0.003214644 0.003820024 0.01137570 0.003280539 0.002496725
## 5 0 0.003312022 0.003976018 0.01182085 0.003274980 0.002498459
## 6 0 0.003258333 0.004091867 0.01236269 0.002900330 0.002368610
## CCCC CCTA AGCC ATCC CCTC AAAC
## 1 0.008116026 0.007619187 0.003965200 0.000682021 0.01057766 0.005357880
## 2 0.007779957 0.008131660 0.003893866 0.000720632 0.01044118 0.005669073
## 3 0.007786994 0.007705751 0.003970617 0.000731007 0.01029219 0.005507635
## 4 0.007704763 0.007862202 0.003879691 0.000696311 0.01027094 0.005543094
## 5 0.007843958 0.007937819 0.003893364 0.000730640 0.01029901 0.005559666
## 6 0.008698478 0.008276515 0.004015813 0.000647489 0.01134925 0.005377047
## TACC CCCA GGCG TCCC CCTG TGTA
## 1 0.002221607 0.01599779 0.000982856 0.002685428 0.01499361 0.005243176
## 2 0.002174759 0.01603900 0.000960930 0.002484062 0.01466085 0.005191527
## 3 0.002135104 0.01580311 0.001007948 0.002490876 0.01456118 0.005199914
## 4 0.002285280 0.01590348 0.000929112 0.002594528 0.01441496 0.005187107
## 5 0.002325028 0.01606972 0.000819623 0.002621185 0.01475627 0.005544642
## 6 0.002287620 0.01725183 0.000985393 0.002819663 0.01623076 0.005298354
## GCCA CACC TGTG CTGC GAGC GCAC
## 1 0.006634208 0.005338965 0.006685072 0.002419808 0.002377139 0.003512217
## 2 0.006685120 0.005174561 0.006295583 0.002036618 0.002244544 0.003399065
## 3 0.006727032 0.005280163 0.006392916 0.002259609 0.002324746 0.003412470
## 4 0.006392771 0.005251900 0.006360705 0.002154120 0.002310018 0.003368372
## 5 0.006260472 0.005278301 0.006810724 0.002084326 0.002194663 0.003315070
## 6 0.007091456 0.005495741 0.006615800 0.001996861 0.002335464 0.003692086
## CACT GAGG AGTG GGCC CCAG ACAG
## 1 0.006860647 0.004493395 0.004860461 0.004245889 0.01352731 0.006097119
## 2 0.006919334 0.004213183 0.005063661 0.004012020 0.01283922 0.006252359
## 3 0.006992908 0.004370662 0.005029584 0.004250947 0.01287564 0.006022268
## 4 0.006969073 0.004330431 0.004959480 0.003886799 0.01289435 0.006255211
## 5 0.007092915 0.004159884 0.004863522 0.003843969 0.01320733 0.006250288
## 6 0.006986652 0.004413781 0.004766078 0.004425069 0.01409552 0.006310755
## AGAG CTAG ACGG CATC AGTC TCCA
## 1 0.006745735 0.002222899 0.000584211 0.004747910 0.003175111 0.003421416
## 2 0.006632864 0.001989073 0.000557534 0.005109270 0.003304134 0.003323094
## 3 0.006739833 0.002138697 0.000562921 0.004951090 0.003284710 0.003282551
## 4 0.006654320 0.002050073 0.000577874 0.004841137 0.003231692 0.003406965
## 5 0.006625299 0.002025105 0.000558697 0.004825387 0.003224598 0.003496408
## 6 0.006412724 0.001764874 0.000598712 0.005058327 0.003202414 0.003517060
## GTCA CCTT CCAA GCCT GGGC CGCT
## 1 0.002818883 0.01250933 0.01106532 0.007529410 0.003123356 0.001046355
## 2 0.002889293 0.01304993 0.01126807 0.007200240 0.002885058 0.000954990
## 3 0.002935296 0.01271220 0.01083966 0.007386966 0.003017113 0.001033803
## 4 0.002797622 0.01271180 0.01115337 0.007094603 0.002858169 0.000978321
## 5 0.002730303 0.01304532 0.01128002 0.007273356 0.002703266 0.001016345
## 6 0.002793785 0.01336671 0.01164134 0.008093395 0.003097215 0.001066400
## CTGG GCCC GTGC CATT TCTG AGGG
## 1 0.003139391 0.004462544 0.002414671 0.01046451 0.003878363 0.003918849
## 2 0.002461517 0.004069508 0.002291887 0.01142773 0.003670433 0.003677182
## 3 0.002811702 0.004204431 0.002403493 0.01134326 0.003564955 0.003847731
## 4 0.002716803 0.004060421 0.002302470 0.01101025 0.003882286 0.003759980
## 5 0.002592534 0.004162305 0.002253149 0.01140778 0.003832350 0.003692555
## 6 0.002355941 0.004819395 0.002376794 0.01076693 0.003982810 0.003734851
## TAGA GGTG AACG TGAG AAGA CAGA
## 1 0.003380409 0.004947759 0.000754279 0.006734704 0.006303620 0.008585118
## 2 0.003164794 0.005185978 0.000780302 0.005915570 0.006444331 0.008039474
## 3 0.003238733 0.005253785 0.000794795 0.006183001 0.006347884 0.008313586
## 4 0.003537747 0.004854678 0.000794760 0.006420371 0.006457486 0.008403701
## 5 0.003278817 0.004381904 0.000784392 0.006389779 0.006523550 0.008210345
## 6 0.003051812 0.004990204 0.000790697 0.006184864 0.006216575 0.007946598
## AAGG GACA GCGC CGTT CAGC CAGT
## 1 0.004485928 0.004365255 0.000469641 0.002548662 0.005870412 0.005770747
## 2 0.004442671 0.004629845 0.000387803 0.002488599 0.005343859 0.005469051
## 3 0.004489450 0.004608153 0.000410958 0.002615870 0.005562922 0.005639730
## 4 0.004517059 0.004501993 0.000407429 0.002508818 0.005538345 0.005656090
## 5 0.004491022 0.004213852 0.000383742 0.002470399 0.005464605 0.005732111
## 6 0.004422970 0.004411018 0.000452471 0.002454553 0.005650255 0.005510834
## AATA GGAG CGAA GTGA CCAC GCTG
## 1 0.005788816 0.006804987 0.001601526 0.003861304 0.008828200 0.007389971
## 2 0.006378635 0.006759053 0.001735538 0.003818762 0.008715770 0.007072330
## 3 0.006024933 0.006990716 0.001660663 0.003892106 0.008596243 0.007155379
## 4 0.006295266 0.006531228 0.001670716 0.003766459 0.008536025 0.006997648
## 5 0.006391374 0.006142783 0.001611851 0.003663491 0.008647679 0.007010063
## 6 0.005864763 0.006809598 0.001933691 0.003747252 0.009290819 0.008023011
## TGCC AAGC AGAA AAAA GAAG GCTC
## 1 0.003279705 0.003344748 0.008898632 0.01537159 0.005572843 0.003854103
## 2 0.002931953 0.003308426 0.009042588 0.01607889 0.005705461 0.003703425
## 3 0.003037386 0.003313095 0.009011738 0.01553056 0.005734500 0.003758864
## 4 0.003117092 0.003356844 0.009109480 0.01611030 0.005626996 0.003601047
## 5 0.003211420 0.003331475 0.009042573 0.01640061 0.005396850 0.003627256
## 6 0.003270949 0.003290905 0.008527956 0.01533631 0.005594479 0.004289883
## TGGG GGCA AATG CCGG GGAT AACA
## 1 0.004933848 0.004251382 0.006137902 0.000873526 0.004451142 0.006032596
## 2 0.003932913 0.004584352 0.006716061 0.000716745 0.004675830 0.006387248
## 3 0.004268842 0.004531564 0.006496052 0.000781926 0.004759492 0.006243213
## 4 0.004637367 0.004345733 0.006742562 0.000768795 0.004434730 0.006304435
## 5 0.004293566 0.003777612 0.006551197 0.000754110 0.004234883 0.006400178
## 6 0.004206793 0.004139011 0.006614974 0.000843853 0.004436537 0.006098885
## CATG CCAT GGGT CTGA CAGG TATT
## 1 0.007569391 0.01128457 0.003155915 0.003912688 0.008035380 0.005848722
## 2 0.007712974 0.01187048 0.003029124 0.003478287 0.007206974 0.006243717
## 3 0.007687215 0.01169651 0.003155853 0.003744562 0.007630410 0.006085179
## 4 0.007624526 0.01129186 0.003009050 0.003667366 0.007531582 0.006206820
## 5 0.007697475 0.01194304 0.002939198 0.003544548 0.007410821 0.006552649
## 6 0.007713516 0.01195988 0.003111051 0.003300991 0.007547839 0.006084116
## GAGA GAAC GGCT GGGA AGAC GCAG
## 1 0.005167785 0.002494101 0.005539112 0.004814155 0.003923169 0.006299507
## 2 0.005152465 0.002712653 0.005647251 0.004797452 0.003990964 0.005873343
## 3 0.005211164 0.002677094 0.005841650 0.004886257 0.003973940 0.005930281
## 4 0.005180061 0.002573329 0.005347880 0.004711644 0.003940600 0.006087093
## 5 0.004961919 0.002383979 0.005169021 0.004326054 0.003890244 0.005862924
## 6 0.005163885 0.002573463 0.005771821 0.004711971 0.003840032 0.006476970
## CCGC CGAG TTTC ATTC TGCA AAAT
## 1 0.000780988 0.001698712 0.002097520 0.004608946 0.004911830 0.009192594
## 2 0.000663275 0.001557858 0.001966919 0.005006217 0.004635438 0.009945799
## 3 0.000707412 0.001656210 0.002008019 0.004944647 0.004786225 0.009544890
## 4 0.000684972 0.001561495 0.001981427 0.004724131 0.004824907 0.009840248
## 5 0.000676726 0.001605970 0.002014724 0.004918853 0.004921363 0.010060388
## 6 0.000768588 0.001746318 0.001911366 0.004648783 0.004816614 0.009305355
## ACAA GAAA AGCA CAAA GCTA CGCC
## 1 0.006171337 0.009231818 0.004941999 0.01255090 0.004053121 0.001372325
## 2 0.006592761 0.009595945 0.005133476 0.01278388 0.004324733 0.001209696
## 3 0.006243989 0.009501882 0.005102344 0.01253070 0.004200940 0.001293523
## 4 0.006473716 0.009570788 0.005064974 0.01293974 0.004098873 0.001246522
## 5 0.006604250 0.009334410 0.004909440 0.01279444 0.004015445 0.001251793
## 6 0.006419005 0.009337729 0.004796083 0.01231372 0.004400304 0.001417351
## ACCA CCCG TCTC CAAC ACTA GCGG
## 1 0.005080725 0.002558713 0.004093697 0.004593921 0.003474833 0.000541721
## 2 0.005531871 0.002353609 0.003950371 0.004703822 0.003940544 0.000458932
## 3 0.005282322 0.002456233 0.003846770 0.004657570 0.003678801 0.000489553
## 4 0.005406855 0.002390051 0.003987497 0.004627994 0.003819568 0.000479850
## 5 0.005290564 0.002400080 0.004031402 0.004614537 0.003729472 0.000456321
## 6 0.005317161 0.002739068 0.004370855 0.004564186 0.003614075 0.000531917
## TGCT GAGT CGTG AGGC TAGC CTCC
## 1 0.004325941 0.003243480 0.002110406 0.003842968 0.001805963 0.002159489
## 2 0.004062166 0.003199101 0.001972093 0.003630330 0.001673354 0.001993856
## 3 0.004259482 0.003272380 0.002070609 0.003752337 0.001685743 0.002144330
## 4 0.004174895 0.003256257 0.001947095 0.003688031 0.001852704 0.002020790
## 5 0.004510995 0.003161684 0.002028960 0.003627884 0.001702574 0.002017395
## 6 0.004353663 0.003251747 0.002182206 0.003715039 0.001645785 0.001915673
## TCCT ACTG CAAG AAGT CGCA TAAT
## 1 0.003568604 0.005243309 0.007984219 0.003997788 0.001026445 0.004573285
## 2 0.003372489 0.005609996 0.007733004 0.004044608 0.000968979 0.004696495
## 3 0.003351195 0.005423103 0.007838908 0.004001128 0.001014576 0.004560540
## 4 0.003523876 0.005578275 0.007859608 0.004078271 0.000965456 0.004872795
## 5 0.003807285 0.005541325 0.007854877 0.004148732 0.000972704 0.005070965
## 6 0.003721141 0.005561190 0.007730224 0.003914526 0.001058307 0.004602698
## GTTT TGAT CTAT TAGG TGTT AGAT
## 1 0.006804869 0.005122993 0.002581428 0.002513891 0.006762155 0.004930196
## 2 0.007163880 0.004912789 0.002579209 0.002274791 0.006630292 0.005145254
## 3 0.007267858 0.005027679 0.002694044 0.002375951 0.006805324 0.005074701
## 4 0.006817075 0.005012117 0.002571599 0.002517138 0.006435092 0.005081550
## 5 0.007155040 0.005347042 0.002652436 0.002315740 0.007359453 0.005120773
## 6 0.006923698 0.004932652 0.002270303 0.002179389 0.006792262 0.004786393
## GGTA GTTG GTGG CTCA CTCT GCAT
## 1 0.002870060 0.003376564 0.003566437 0.003094198 0.003005772 0.005038442
## 2 0.003267485 0.003474703 0.003299379 0.002995279 0.002939974 0.005116698
## 3 0.003148685 0.003504777 0.003448141 0.003177830 0.003165484 0.005095109
## 4 0.003022386 0.003325375 0.003382133 0.003027215 0.002953976 0.004974562
## 5 0.002662297 0.003288373 0.003290668 0.002925554 0.003027519 0.004999535
## 6 0.002761195 0.003375233 0.003406279 0.002669330 0.002699300 0.005241681
## GCAA GTTA ACTC CTTT GATG ATGG
## 1 0.005440724 0.002943015 0.004358262 0.006245362 0.003388352 0.004121460
## 2 0.005410740 0.003249334 0.004681451 0.006543135 0.003652007 0.004176217
## 3 0.005303607 0.003161048 0.004520534 0.006687937 0.003617392 0.004168506
## 4 0.005406147 0.003056497 0.004627696 0.006318227 0.003450826 0.004197746
## 5 0.005343133 0.002993722 0.004616205 0.006716541 0.003263003 0.004076584
## 6 0.005668901 0.002966407 0.004668901 0.006188615 0.003516342 0.004084061
## ACAT CTTA ACCT ACAC TCTA ATAA
## 1 0.005855655 0.002969234 0.002813078 0.004193852 0.002857366 0.005663778
## 2 0.006451875 0.003194448 0.003164432 0.004411904 0.002929612 0.005981439
## 3 0.006128811 0.003155583 0.003017956 0.004255299 0.002717927 0.005753238
## 4 0.006252238 0.003046275 0.003132394 0.004321986 0.002908809 0.005880477
## 5 0.006392468 0.003103288 0.003324196 0.004322701 0.003021261 0.006047005
## 6 0.006090774 0.002898769 0.003164602 0.004342751 0.003071032 0.005362798
## ATTA GCTT TCTT GGAA GGAC TTTT
## 1 0.004417292 0.006258664 0.004927835 0.008536094 0.002799435 0.004319542
## 2 0.004802583 0.006384170 0.004928078 0.008946009 0.002894148 0.004151216
## 3 0.004590292 0.006404351 0.004755073 0.009092054 0.002931029 0.004150173
## 4 0.004651993 0.006191502 0.004919361 0.008771971 0.002763495 0.004236433
## 5 0.004743898 0.006371150 0.005252016 0.008266213 0.002542547 0.004380218
## 6 0.004346556 0.006788260 0.005233443 0.008831690 0.002844446 0.003957291
## ATTG TCAT TGAA AGGA ATAT ATGA
## 1 0.003410815 0.003462585 0.007052969 0.005396273 0.001468234 0.004358856
## 2 0.003641516 0.003463807 0.006487209 0.005243061 0.001615794 0.004504147
## 3 0.003536992 0.003275265 0.006565186 0.005330356 0.001570025 0.004406081
## 4 0.003532023 0.003488476 0.007023487 0.005361357 0.001596078 0.004431255
## 5 0.003521527 0.003661071 0.006983456 0.005220604 0.001602779 0.004426046
## 6 0.003338839 0.003686720 0.006574793 0.005117297 0.001264129 0.004168622
## TTAT GGGG GAAT ACTT CAAT AAAG
## 1 0.001884501 0.003891977 0.007088838 0.005756035 0.006839506 0.007868787
## 2 0.001683528 0.003620677 0.008070227 0.006459737 0.007241238 0.008088941
## 3 0.001756647 0.003809209 0.007980009 0.006153975 0.007079549 0.007985002
## 4 0.001788964 0.003609932 0.007957836 0.006370329 0.007171097 0.008081400
## 5 0.001832920 0.003408374 0.007465953 0.006501640 0.007291196 0.008188220
## 6 0.001504532 0.003772753 0.007980372 0.006167098 0.006893226 0.007762598
## TTTA TAAA ATGT TGGC TGAC CGGT
## 1 0.002411895 0.007073487 0.000852236 0.003108970 0.003063718 0.000777142
## 2 0.002372222 0.006915808 0.000852732 0.002516794 0.002814146 0.000706340
## 3 0.002373151 0.006773970 0.000867622 0.002580705 0.002834471 0.000743252
## 4 0.002399141 0.007473299 0.000857021 0.003117674 0.002970048 0.000725689
## 5 0.002478647 0.007416666 0.000897491 0.002863841 0.003010378 0.000717014
## 6 0.002256466 0.006744598 0.000711161 0.002816953 0.002951494 0.000755326
## GTAA CGGA AACT CTTG AGGT TATG
## 1 0.003434688 0.000932764 0.001593925 0.002965998 0.003529409 0.003608111
## 2 0.003576210 0.000850709 0.001798907 0.002914236 0.003419398 0.003684407
## 3 0.003547938 0.000902080 0.001733929 0.003038415 0.003488873 0.003518017
## 4 0.003477027 0.000883568 0.001809645 0.002861472 0.003461363 0.003760027
## 5 0.003447711 0.000873376 0.001887264 0.002906155 0.003446528 0.003909733
## 6 0.003387975 0.000928468 0.001701560 0.002768320 0.003368180 0.003671520
## AGTT GATT GGTT TACA GACC AATC
## 1 0.004926187 0.003791570 0.005096551 0.004399461 0.002212877 0.002817636
## 2 0.005298467 0.004195235 0.005350579 0.004393435 0.002190193 0.003135515
## 3 0.005220761 0.004219526 0.005533558 0.004309540 0.002231612 0.003013841
## 4 0.005191180 0.004010253 0.005006958 0.004617017 0.002138347 0.003025705
## 5 0.005297593 0.003946578 0.005075573 0.004729375 0.002171606 0.002954958
## 6 0.004907671 0.003997041 0.005246849 0.004477059 0.002376758 0.003027746
## TATA CACA TATC TCAC AATT GTGT
## 1 0.004613623 0.009214834 0.002801572 0.002501613 0.006263356 0.003720202
## 2 0.004940174 0.009294475 0.002928355 0.002391891 0.006918467 0.003716114
## 3 0.004665784 0.009290449 0.002805950 0.002318472 0.006598176 0.003784602
## 4 0.004947386 0.009417611 0.002852602 0.002516383 0.006797464 0.003675182
## 5 0.005246691 0.009327507 0.002948127 0.002436781 0.006945589 0.003606494
## 6 0.004891197 0.009197249 0.002884825 0.002564024 0.006410678 0.003569982
## AGTA CCGT TAAG TTAA CTGT GTTC
## 1 0.003639689 0.000793340 0.003465896 0.002201460 0.003017456 0.002446695
## 2 0.003923823 0.000710603 0.003317025 0.001881063 0.002687595 0.002508802
## 3 0.003771868 0.000763289 0.003297005 0.001942089 0.002947524 0.002568898
## 4 0.003833282 0.000724855 0.003573447 0.002099722 0.002813223 0.002373617
## 5 0.003792583 0.000759291 0.003464440 0.002026647 0.002886038 0.002427799
## 6 0.003539815 0.000799796 0.003210256 0.001674390 0.002531685 0.002520738
## CCGA GATA AGCG CGGG CATA AGCT
## 1 0.000815625 0.002538328 0.000681931 0.001455318 0.006407486 0.001709179
## 2 0.000720444 0.002872139 0.000661512 0.001264019 0.006941632 0.001887176
## 3 0.000783107 0.002760463 0.000690141 0.001369589 0.006593403 0.001869582
## 4 0.000749216 0.002735455 0.000677675 0.001311929 0.006819576 0.001857045
## 5 0.000764509 0.002534927 0.000644185 0.001288028 0.006842890 0.001869783
## 6 0.000821403 0.002565352 0.000654380 0.001421784 0.006536425 0.001716653
## TGCG ATAG ATGC TTGC TGTC TCAA
## 1 0.000538098 0.003205443 0.002823308 0.000860461 0.004168954 0.002978721
## 2 0.000472950 0.003298570 0.002795604 0.000710892 0.004003985 0.002820099
## 3 0.000515105 0.003231868 0.002821838 0.000745849 0.004095240 0.002686252
## 4 0.000495797 0.003257405 0.002805501 0.000822769 0.003930881 0.003067695
## 5 0.000494671 0.003240627 0.002783661 0.000734387 0.004233251 0.002936258
## 6 0.000505034 0.002960216 0.002700987 0.000658849 0.004228741 0.002908047
## GGTC TTCT TTTG GACT TTGT CACG
## 1 0.002783281 0.001367129 0.001797337 0.003526529 0.001388775 0.001412218
## 2 0.002856502 0.001184883 0.001611314 0.003687587 0.001172469 0.001317590
## 3 0.002935903 0.001281278 0.001657964 0.003724086 0.001229382 0.001386843
## 4 0.002616561 0.001266039 0.001696995 0.003682149 0.001306708 0.001372367
## 5 0.002539122 0.001284640 0.001648875 0.003583473 0.001319853 0.001342570
## 6 0.002919873 0.001067585 0.001471332 0.003720333 0.001089515 0.001416507
## CGTA CTTC ATTT CGTC GTAT TTGA
## 1 0.001143585 0.003182208 0.002335332 0.001130119 0.003120135 0.001778750
## 2 0.001148032 0.003212093 0.002808192 0.001070168 0.003326794 0.001518435
## 3 0.001149521 0.003322338 0.002718331 0.001111724 0.003276917 0.001592406
## 4 0.001145668 0.003123367 0.002668554 0.001036415 0.003198367 0.001710740
## 5 0.001163276 0.003180815 0.002815791 0.001067300 0.003165395 0.001555911
## 6 0.001172927 0.003076631 0.002409312 0.001176929 0.003006929 0.001420868
## TGGT GTAC TTAG TCGT CTAC TTGG
## 1 0.003090664 0.001526447 0.001202718 0.000187111 0.001599611 0.001142130
## 2 0.002531780 0.001625693 0.000949398 0.000154514 0.001548595 0.000848209
## 3 0.002615803 0.001625042 0.001018303 0.000160008 0.001597483 0.000939994
## 4 0.003127173 0.001560127 0.001120238 0.000184048 0.001532149 0.001066405
## 5 0.002994026 0.001527206 0.000969603 0.000175385 0.001508470 0.000838862
## 6 0.002695639 0.001516879 0.000790249 0.000170755 0.001379862 0.000706351
## ACGC CGCG TCAG ATAC TGGA GTAG
## 1 0.000441818 0.000303092 0.003048025 0.002594047 0.005320793 0.002728082
## 2 0.000404205 0.000261989 0.002736354 0.002725212 0.004508598 0.002706642
## 3 0.000424029 0.000286486 0.002702882 0.002646077 0.004725237 0.002772438
## 4 0.000422558 0.000266833 0.003089398 0.002663930 0.005365446 0.002662121
## 5 0.000414438 0.000259259 0.002765427 0.002643597 0.004909888 0.002600244
## 6 0.000448325 0.000308867 0.002775498 0.002426253 0.004697991 0.002633474
## TACT ACGT GATC ATCT CGGC CGAC
## 1 0.003054157 0.000659276 0.002869213 0.000989329 0.001023625 0.000602636
## 2 0.003084068 0.000643794 0.001605692 0.001123306 0.000894193 0.000567534
## 3 0.003028042 0.000650320 0.001308787 0.001138895 0.000943604 0.000586482
## 4 0.003205523 0.000650720 0.001102860 0.001105266 0.000919833 0.000577890
## 5 0.003376424 0.000652091 0.000986044 0.001180219 0.000891574 0.000578438
## 6 0.003171044 0.000659154 0.001108807 0.000939613 0.001025609 0.000623746
## CGAT ATCA ACCG TTCA GTCC GCCG
## 1 0.001001177 0.000798477 0.000658355 0.001315582 0.002078130 0.001028569
## 2 0.001040268 0.000919830 0.000682221 0.001148740 0.002017904 0.000950655
## 3 0.001057365 0.000883426 0.000685402 0.001239434 0.002088824 0.000972090
## 4 0.000929096 0.000860985 0.000687504 0.001227053 0.001983518 0.000944461
## 5 0.001032266 0.000816217 0.000667170 0.001213424 0.002012393 0.000899158
## 6 0.001075714 0.000724620 0.000700501 0.001020656 0.002107479 0.001115142
## TAGT ACGA GCGT CTCG TCCG TCGG
## 1 0.001897017 0.000445203 0.000541825 0.000452151 0.000314836 1.449760e-04
## 2 0.001755741 0.000429105 0.000486679 0.000405621 0.000270559 9.666526e-05
## 3 0.001751655 0.000433761 0.000510703 0.000462281 0.000290551 1.147230e-04
## 4 0.002029047 0.000443553 0.000492259 0.000428754 0.000302596 1.258440e-04
## 5 0.001925095 0.000440632 0.000490853 0.000403232 0.000288555 1.039900e-04
## 6 0.001704163 0.000450227 0.000548930 0.000403424 0.000301294 1.038710e-04
## GTCT TTCC TCGC GCGA GACG TTAC
## 1 0.001125680 0.001298820 1.218600e-04 0.000423290 0.000618106 0.000954172
## 2 0.001215737 0.001205823 9.069684e-05 0.000372340 0.000598504 0.000838743
## 3 0.001325181 0.001321673 9.800849e-05 0.000402036 0.000629187 0.000853758
## 4 0.001128699 0.001176067 1.063750e-04 0.000389690 0.000605333 0.000889465
## 5 0.001153988 0.001284245 1.001710e-04 0.000378005 0.000569993 0.000862367
## 6 0.001140750 0.001124222 1.083220e-04 0.000436552 0.000650522 0.000756080
## TCGA TACG TTCG GTCG ATCG
## 1 0.000247343 0.000384822 0.000173007 0.000321636 0.000144962
## 2 0.000222970 0.000362282 0.000154977 0.000305733 0.000181928
## 3 0.000223930 0.000377985 0.000162757 0.000329073 0.000164899
## 4 0.000250430 0.000398811 0.000147736 0.000304530 0.000148900
## 5 0.000212033 0.000381698 0.000146035 0.000299367 0.000130454
## 6 0.000244082 0.000374388 0.000132925 0.000321178 0.000142401
## [1] "Healthy" "CLD" "HCC"
## Sample_ID Group TF_Score DELFI_Score
## Length:475 Length:475 Min. :0.001012 Min. :0.09211
## Class :character Class :character 1st Qu.:0.001572 1st Qu.:0.19719
## Mode :character Mode :character Median :0.010420 Median :0.23311
## Mean :0.031400 Mean :0.25739
## 3rd Qu.:0.014970 3rd Qu.:0.28785
## Max. :0.568900 Max. :0.93001
## mtcfDNA_fraction P.1.10. P.1.20. P.1.30.
## Min. :0.000286 Min. :1.000e-06 Min. :4.490e-06 Min. :6.490e-06
## 1st Qu.:0.001092 1st Qu.:2.430e-06 1st Qu.:2.340e-05 1st Qu.:1.340e-05
## Median :0.002286 Median :2.960e-06 Median :2.800e-05 Median :4.120e-05
## Mean :0.003830 Mean :6.212e-06 Mean :4.899e-05 Mean :7.829e-05
## 3rd Qu.:0.004765 3rd Qu.:3.985e-06 3rd Qu.:3.345e-05 3rd Qu.:5.360e-05
## Max. :0.069923 Max. :2.121e-04 Max. :1.907e-03 Max. :3.988e-03
## P.1.40. P.1.50. P.1.60.
## Min. :1.780e-05 Min. :3.780e-05 Min. :0.0000703
## 1st Qu.:3.475e-05 1st Qu.:6.895e-05 1st Qu.:0.0001302
## Median :5.880e-05 Median :1.013e-04 Median :0.0004005
## Mean :1.126e-04 Mean :2.693e-04 Mean :0.0012657
## 3rd Qu.:7.550e-05 3rd Qu.:1.565e-04 3rd Qu.:0.0007458
## Max. :5.058e-03 Max. :6.737e-03 Max. :0.0207264
## P.1.70. P.1.80. P.1.90. P.1.100.
## Min. :0.0001194 Min. :0.0002007 Min. :0.0003532 Min. :0.000720
## 1st Qu.:0.0002159 1st Qu.:0.0003901 1st Qu.:0.0009353 1st Qu.:0.002497
## Median :0.0010549 Median :0.0023869 Median :0.0048540 Median :0.009615
## Mean :0.0027901 Mean :0.0050515 Mean :0.0084436 Mean :0.014177
## 3rd Qu.:0.0020041 3rd Qu.:0.0042368 3rd Qu.:0.0082918 3rd Qu.:0.015368
## Max. :0.0393278 Max. :0.0608565 Max. :0.0876294 Max. :0.125969
## P.10.20. P.20.30. P.20.40.
## Min. :2.040e-06 Min. :2.430e-06 Min. :1.360e-05
## 1st Qu.:4.580e-06 1st Qu.:4.715e-06 1st Qu.:3.630e-05
## Median :2.060e-05 Median :1.650e-05 Median :5.160e-05
## Mean :3.546e-05 Mean :3.662e-05 Mean :8.966e-05
## 3rd Qu.:2.640e-05 3rd Qu.:2.555e-05 3rd Qu.:8.240e-05
## Max. :1.892e-03 Max. :2.081e-03 Max. :3.151e-03
## P.30.40. P.30.60. P.40.50.
## Min. :6.170e-06 Min. :0.0000638 Min. :1.350e-05
## 1st Qu.:1.280e-05 1st Qu.:0.0001178 1st Qu.:3.165e-05
## Median :1.770e-05 Median :0.0003527 Median :4.090e-05
## Mean :3.428e-05 Mean :0.0011874 Mean :1.567e-04
## 3rd Qu.:2.545e-05 3rd Qu.:0.0006953 3rd Qu.:7.645e-05
## Max. :1.070e-03 Max. :0.0187529 Max. :2.704e-03
## P.40.60. P.40.80. P.50.60.
## Min. :0.0000949 Min. :0.0001828 Min. :3.250e-05
## 1st Qu.:0.0001945 1st Qu.:0.0003524 1st Qu.:5.745e-05
## Median :0.0003388 Median :0.0023045 Median :2.951e-04
## Mean :0.0011849 Mean :0.0049389 Mean :9.964e-04
## 3rd Qu.:0.0006764 3rd Qu.:0.0040765 3rd Qu.:5.905e-04
## Max. :0.0183577 Max. :0.0584878 Max. :1.565e-02
## P.50.100. P.60.70. P.60.80.
## Min. :0.0006822 Min. :0.0000490 Min. :0.0002108
## 1st Qu.:0.0024342 1st Qu.:0.0000901 1st Qu.:0.0003888
## Median :0.0094142 Median :0.0006482 Median :0.0019409
## Mean :0.0139074 Mean :0.0015244 Mean :0.0038227
## 3rd Qu.:0.0152070 3rd Qu.:0.0012002 3rd Qu.:0.0035351
## Max. :0.1208964 Max. :0.0186014 Max. :0.0401301
## P.60.90. P.60.120. P.70.80. P.70.140.
## Min. :0.0002829 Min. :0.004097 Min. :0.0000813 Min. :0.02530
## 1st Qu.:0.0008043 1st Qu.:0.015009 1st Qu.:0.0001684 1st Qu.:0.07192
## Median :0.0043589 Median :0.027026 Median :0.0012942 Median :0.09071
## Mean :0.0071779 Mean :0.033651 Mean :0.0022614 Mean :0.10069
## 3rd Qu.:0.0074344 3rd Qu.:0.039751 3rd Qu.:0.0022951 3rd Qu.:0.11967
## Max. :0.0669030 Max. :0.188646 Max. :0.0215287 Max. :0.30717
## P.80.90. P.80.100. P.80.120. P.80.160.
## Min. :0.0001525 Min. :0.0005754 Min. :0.003919 Min. :0.1438
## 1st Qu.:0.0005505 1st Qu.:0.0022011 1st Qu.:0.014685 1st Qu.:0.2862
## Median :0.0024191 Median :0.0071709 Median :0.024816 Median :0.3166
## Mean :0.0033920 Mean :0.0091426 Mean :0.029865 Mean :0.3283
## 3rd Qu.:0.0040296 3rd Qu.:0.0111257 3rd Qu.:0.036613 3rd Qu.:0.3641
## Max. :0.0267729 Max. :0.0651127 Max. :0.148516 Max. :0.6114
## P.90.100. P.90.120. P.90.180. P.100.110.
## Min. :0.0003668 Min. :0.003663 Min. :0.3597 Min. :0.001097
## 1st Qu.:0.0015975 1st Qu.:0.013905 1st Qu.:0.7415 1st Qu.:0.004072
## Median :0.0046647 Median :0.022505 Median :0.7744 Median :0.007180
## Mean :0.0057332 Mean :0.026473 Mean :0.7689 Mean :0.008503
## 3rd Qu.:0.0071214 3rd Qu.:0.033114 3rd Qu.:0.8127 3rd Qu.:0.010733
## Max. :0.0383398 Max. :0.121743 Max. :0.9139 Max. :0.042918
## P.100.120. P.100.150. P.100.200. P.110.120.
## Min. :0.002977 Min. :0.07425 Min. :0.5034 Min. :0.001956
## 1st Qu.:0.011932 1st Qu.:0.15224 1st Qu.:0.9049 1st Qu.:0.007829
## Median :0.017795 Median :0.17365 Median :0.9268 Median :0.010691
## Mean :0.020635 Mean :0.18216 Mean :0.9124 Mean :0.012237
## 3rd Qu.:0.026043 3rd Qu.:0.20532 3rd Qu.:0.9402 3rd Qu.:0.015053
## Max. :0.083403 Max. :0.42563 Max. :0.9663 Max. :0.043520
## P.120.130. P.120.140. P.120.150. P.120.160.
## Min. :0.004614 Min. :0.01966 Min. :0.06511 Min. :0.1285
## 1st Qu.:0.015847 1st Qu.:0.05407 1st Qu.:0.13757 1st Qu.:0.2669
## Median :0.019396 Median :0.06396 Median :0.15628 Median :0.2921
## Mean :0.020971 Mean :0.06781 Mean :0.16142 Mean :0.2984
## 3rd Qu.:0.024316 3rd Qu.:0.07678 3rd Qu.:0.17781 3rd Qu.:0.3233
## Max. :0.068162 Max. :0.19530 Max. :0.36252 Max. :0.5243
## P.120.180. P.130.140. P.140.150. P.140.160.
## Min. :0.3398 Min. :0.01666 Min. :0.03749 Min. :0.09206
## 1st Qu.:0.7173 1st Qu.:0.03895 1st Qu.:0.08252 1st Qu.:0.20563
## Median :0.7508 Median :0.04518 Median :0.09128 Median :0.22406
## Mean :0.7425 Mean :0.04760 Mean :0.09285 Mean :0.22676
## 3rd Qu.:0.7827 3rd Qu.:0.05264 3rd Qu.:0.10142 3rd Qu.:0.24816
## Max. :0.8747 Max. :0.12732 Max. :0.16722 Max. :0.36099
## P.140.210. P.150.160. P.150.180. P.150.200.
## Min. :0.4718 Min. :0.05457 Min. :0.2619 Min. :0.3927
## 1st Qu.:0.8230 1st Qu.:0.12734 1st Qu.:0.5623 1st Qu.:0.7001
## Median :0.8661 Median :0.13677 Median :0.5912 Median :0.7493
## Mean :0.8483 Mean :0.13700 Mean :0.5810 Mean :0.7303
## 3rd Qu.:0.8914 3rd Qu.:0.14790 3rd Qu.:0.6141 3rd Qu.:0.7737
## Max. :0.9346 Max. :0.20759 Max. :0.6862 Max. :0.8385
## P.160.170. P.160.180. P.160.200. P.160.240.
## Min. :0.09594 Min. :0.2016 Min. :0.3136 Min. :0.3387
## 1st Qu.:0.23303 1st Qu.:0.4205 1st Qu.:0.5627 1st Qu.:0.6013
## Median :0.24886 Median :0.4453 Median :0.6091 Median :0.6539
## Mean :0.24588 Mean :0.4375 Mean :0.5933 Mean :0.6419
## 3rd Qu.:0.26339 3rd Qu.:0.4652 3rd Qu.:0.6403 3rd Qu.:0.6923
## Max. :0.31414 Max. :0.5349 Max. :0.7191 Max. :0.8037
## P.170.180. P.180.190. P.180.200. P.180.210.
## Min. :0.09431 Min. :0.03215 Min. :0.04516 Min. :0.05195
## 1st Qu.:0.18568 1st Qu.:0.08723 1st Qu.:0.12473 1st Qu.:0.14382
## Median :0.20458 Median :0.10255 Median :0.15043 Median :0.17870
## Mean :0.19816 Mean :0.10027 Mean :0.14646 Mean :0.17436
## 3rd Qu.:0.21658 3rd Qu.:0.11545 3rd Qu.:0.16969 3rd Qu.:0.20525
## Max. :0.24567 Max. :0.15057 Max. :0.21596 Max. :0.28035
## P.180.240. P.180.270. P.190.200. P.200.210.
## Min. :0.05914 Min. :0.06244 Min. :0.01301 Min. :0.006796
## 1st Qu.:0.16157 1st Qu.:0.16795 1st Qu.:0.03841 1st Qu.:0.018590
## Median :0.20085 Median :0.20772 Median :0.04940 Median :0.024842
## Mean :0.19785 Mean :0.20494 Mean :0.04895 Mean :0.025141
## 3rd Qu.:0.23250 3rd Qu.:0.23993 3rd Qu.:0.05888 3rd Qu.:0.030166
## Max. :0.33012 Max. :0.35282 Max. :0.08716 Max. :0.062060
## P.200.220. P.200.240. P.200.250. P.200.300.
## Min. :0.01049 Min. :0.01398 Min. :0.01520 Min. :0.02070
## 1st Qu.:0.02832 1st Qu.:0.03666 1st Qu.:0.03926 1st Qu.:0.04800
## Median :0.03666 Median :0.04693 Median :0.04937 Median :0.06059
## Mean :0.03732 Mean :0.04863 Mean :0.05134 Mean :0.06293
## 3rd Qu.:0.04409 3rd Qu.:0.05775 3rd Qu.:0.06025 3rd Qu.:0.07285
## Max. :0.10214 Max. :0.14022 Max. :0.14922 Max. :0.18598
## P.210.220. P.210.240. P.210.280. P.220.230.
## Min. :0.003690 Min. :0.007185 Min. :0.01214 Min. :0.002069
## 1st Qu.:0.009567 1st Qu.:0.017700 1st Qu.:0.02518 1st Qu.:0.005037
## Median :0.012334 Median :0.022283 Median :0.03089 Median :0.006302
## Mean :0.012914 Mean :0.023487 Mean :0.03280 Mean :0.006720
## 3rd Qu.:0.015353 3rd Qu.:0.027239 3rd Qu.:0.03723 3rd Qu.:0.007778
## Max. :0.040079 Max. :0.078164 Max. :0.10793 Max. :0.024022
## P.220.240. P.230.240. P.240.250. P.240.260.
## Min. :0.003495 Min. :0.001380 Min. :0.0009052 Min. :0.001522
## 1st Qu.:0.007821 1st Qu.:0.002887 1st Qu.:0.0019966 1st Qu.:0.003383
## Median :0.009750 Median :0.003526 Median :0.0024283 Median :0.004361
## Mean :0.010393 Mean :0.003853 Mean :0.0027116 Mean :0.004906
## 3rd Qu.:0.011775 3rd Qu.:0.004363 3rd Qu.:0.0030301 3rd Qu.:0.005590
## Max. :0.038085 Max. :0.014063 Max. :0.0135096 Max. :0.026305
## P.240.270. P.240.280. P.240.300. P.240.320.
## Min. :0.002140 Min. :0.002484 Min. :0.002484 Min. :0.002484
## 1st Qu.:0.004768 1st Qu.:0.006119 1st Qu.:0.007329 1st Qu.:0.007329
## Median :0.006241 Median :0.008057 Median :0.011649 Median :0.012103
## Mean :0.007090 Mean :0.009316 Mean :0.014298 Mean :0.018305
## 3rd Qu.:0.008209 3rd Qu.:0.010961 3rd Qu.:0.018539 3rd Qu.:0.023329
## Max. :0.038724 Max. :0.051314 Max. :0.078829 Max. :0.107244
## P.250.260. P.250.300. P.260.270. P.260.280.
## Min. :0.0006169 Min. :0.001425 Min. :0.0005529 Min. :0.000667
## 1st Qu.:0.0014817 1st Qu.:0.005416 1st Qu.:0.0012646 1st Qu.:0.002464
## Median :0.0019652 Median :0.009189 Median :0.0017676 Median :0.003624
## Mean :0.0022549 Mean :0.011587 Mean :0.0021237 Mean :0.004336
## 3rd Qu.:0.0026474 3rd Qu.:0.015818 3rd Qu.:0.0026121 3rd Qu.:0.005340
## Max. :0.0127957 Max. :0.065320 Max. :0.0124183 Max. :0.025008
## P.270.280. P.270.300. P.270.360. P.280.290.
## Min. :0.000000 Min. :0.000000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.001257 1st Qu.:0.002652 1st Qu.:0.002652 1st Qu.:0.001386
## Median :0.001933 Median :0.005593 Median :0.005905 Median :0.002208
## Mean :0.002226 Mean :0.007208 Mean :0.014879 Mean :0.002474
## 3rd Qu.:0.002719 3rd Qu.:0.010774 3rd Qu.:0.015829 3rd Qu.:0.003402
## Max. :0.012590 Max. :0.040106 Max. :0.146189 Max. :0.013262
## P.280.300. P.280.320. P.280.350. P.290.300.
## Min. :0.000000 Min. :0.000000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.003208 1st Qu.:0.001432 1st Qu.:0.001432 1st Qu.:0.000000
## Median :0.004303 Median :0.004073 Median :0.004073 Median :0.001986
## Mean :0.005643 Mean :0.008989 Mean :0.011968 Mean :0.002508
## 3rd Qu.:0.008241 3rd Qu.:0.013398 3rd Qu.:0.013398 3rd Qu.:0.004671
## Max. :0.027515 Max. :0.061544 Max. :0.114419 Max. :0.014253
## P.300.310. P.300.320. P.300.330. P.300.350.
## Min. :0.000000 Min. :0.000000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.000000
## Median :0.000000 Median :0.006900 Median :0.000000 Median :0.000000
## Mean :0.002271 Mean :0.006576 Mean :0.005311 Mean :0.006985
## 3rd Qu.:0.004935 3rd Qu.:0.009404 3rd Qu.:0.005342 3rd Qu.:0.005342
## Max. :0.018048 Max. :0.039102 Max. :0.059337 Max. :0.094000
## P.300.360. P.300.400. P.310.320. P.320.330.
## Min. :0.000000 Min. :0.000000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.000000
## Median :0.000000 Median :0.000000 Median :0.000000 Median :0.000000
## Mean :0.007671 Mean :0.009427 Mean :0.001735 Mean :0.001304
## 3rd Qu.:0.005342 3rd Qu.:0.005342 3rd Qu.:0.000000 3rd Qu.:0.000000
## Max. :0.123057 Max. :0.214529 Max. :0.021054 Max. :0.020461
## P.320.340. P.320.360. P.320.400. AACC
## Min. :0.000000 Min. :0.000000 Min. :0.000000 Min. :0.002290
## 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.002466
## Median :0.000000 Median :0.000000 Median :0.000000 Median :0.002516
## Mean :0.005737 Mean :0.003664 Mean :0.005421 Mean :0.002728
## 3rd Qu.:0.010506 3rd Qu.:0.000000 3rd Qu.:0.000000 3rd Qu.:0.003160
## Max. :0.042049 Max. :0.098087 Max. :0.189560 Max. :0.004468
## ACCC CCCT CTAA TAAC
## Min. :0.002532 Min. :0.005934 Min. :0.002772 Min. :0.001588
## 1st Qu.:0.002992 1st Qu.:0.009717 1st Qu.:0.003328 1st Qu.:0.002476
## Median :0.003203 Median :0.010378 Median :0.004983 Median :0.002921
## Mean :0.003358 Mean :0.010539 Mean :0.004616 Mean :0.002820
## 3rd Qu.:0.003789 3rd Qu.:0.011449 3rd Qu.:0.005403 3rd Qu.:0.003124
## Max. :0.004946 Max. :0.013820 Max. :0.014935 Max. :0.003546
## CCCC CCTA AGCC ATCC
## Min. :0.004245 Min. :0.004579 Min. :0.003008 Min. :0.0005892
## 1st Qu.:0.006861 1st Qu.:0.006195 1st Qu.:0.003269 1st Qu.:0.0006949
## Median :0.007263 Median :0.006522 Median :0.003465 Median :0.0022321
## Mean :0.007478 Mean :0.006808 Mean :0.003545 Mean :0.0017716
## 3rd Qu.:0.007990 3rd Qu.:0.007738 3rd Qu.:0.003803 3rd Qu.:0.0022786
## Max. :0.011879 Max. :0.009345 Max. :0.004958 Max. :0.0033270
## CCTC AAAC TACC CCCA
## Min. :0.007092 Min. :0.003158 Min. :0.001492 Min. :0.005945
## 1st Qu.:0.008446 1st Qu.:0.004403 1st Qu.:0.002253 1st Qu.:0.014634
## Median :0.008979 Median :0.004536 Median :0.002412 Median :0.015347
## Mean :0.009261 Mean :0.004756 Mean :0.002365 Mean :0.015452
## 3rd Qu.:0.010152 3rd Qu.:0.005478 3rd Qu.:0.002478 3rd Qu.:0.016171
## Max. :0.012475 Max. :0.006611 Max. :0.002906 Max. :0.022195
## GGCG TCCC CCTG TGTA
## Min. :0.0007021 Min. :0.001072 Min. :0.009325 Min. :0.003602
## 1st Qu.:0.0008685 1st Qu.:0.002681 1st Qu.:0.011570 1st Qu.:0.005315
## Median :0.0009227 Median :0.003710 Median :0.012336 Median :0.005568
## Mean :0.0009743 Mean :0.003490 Mean :0.012919 Mean :0.005522
## 3rd Qu.:0.0010016 3rd Qu.:0.003874 3rd Qu.:0.014414 3rd Qu.:0.005744
## Max. :0.0026048 Max. :0.005178 Max. :0.017603 Max. :0.007028
## GCCA CACC TGTG CTGC
## Min. :0.003932 Min. :0.003512 Min. :0.003809 Min. :0.001672
## 1st Qu.:0.004816 1st Qu.:0.004505 1st Qu.:0.006595 1st Qu.:0.002210
## Median :0.005172 Median :0.004756 Median :0.006724 Median :0.004626
## Mean :0.005559 Mean :0.004845 Mean :0.006737 Mean :0.004060
## 3rd Qu.:0.006461 3rd Qu.:0.005152 3rd Qu.:0.006878 3rd Qu.:0.004949
## Max. :0.008169 Max. :0.006986 Max. :0.007907 Max. :0.007656
## GAGC GCAC CACT GAGG
## Min. :0.001950 Min. :0.002038 Min. :0.004403 Min. :0.002952
## 1st Qu.:0.002150 1st Qu.:0.002441 1st Qu.:0.005687 1st Qu.:0.003272
## Median :0.002212 Median :0.002591 Median :0.005782 Median :0.003511
## Mean :0.002257 Mean :0.002833 Mean :0.006100 Mean :0.003714
## 3rd Qu.:0.002319 3rd Qu.:0.003386 3rd Qu.:0.006794 3rd Qu.:0.004210
## Max. :0.003079 Max. :0.004007 Max. :0.007436 Max. :0.005047
## AGTG GGCC CCAG ACAG
## Min. :0.004068 Min. :0.002570 Min. :0.00611 Min. :0.004382
## 1st Qu.:0.004353 1st Qu.:0.003117 1st Qu.:0.01176 1st Qu.:0.004984
## Median :0.004451 Median :0.003387 Median :0.01233 Median :0.005101
## Mean :0.004595 Mean :0.003584 Mean :0.01250 Mean :0.005426
## 3rd Qu.:0.004830 3rd Qu.:0.004038 3rd Qu.:0.01319 3rd Qu.:0.006181
## Max. :0.006224 Max. :0.005604 Max. :0.01758 Max. :0.007346
## AGAG CTAG ACGG CATC
## Min. :0.005228 Min. :0.001515 Min. :0.0003943 Min. :0.004301
## 1st Qu.:0.005612 1st Qu.:0.002145 1st Qu.:0.0004651 1st Qu.:0.004725
## Median :0.005822 Median :0.003485 Median :0.0005022 Median :0.004811
## Mean :0.006010 Mean :0.003209 Mean :0.0005161 Mean :0.004820
## 3rd Qu.:0.006487 3rd Qu.:0.003798 3rd Qu.:0.0005536 3rd Qu.:0.004908
## Max. :0.008505 Max. :0.009402 Max. :0.0009250 Max. :0.005986
## AGTC TCCA GTCA CCTT
## Min. :0.002357 Min. :0.001764 Min. :0.002164 Min. :0.007326
## 1st Qu.:0.002655 1st Qu.:0.003534 1st Qu.:0.002447 1st Qu.:0.009408
## Median :0.002692 Median :0.004896 Median :0.002481 Median :0.009793
## Mean :0.002849 Mean :0.004499 Mean :0.002584 Mean :0.010578
## 3rd Qu.:0.003167 3rd Qu.:0.005050 3rd Qu.:0.002773 3rd Qu.:0.012512
## Max. :0.004020 Max. :0.005698 Max. :0.003502 Max. :0.015071
## CCAA GCCT GGGC CGCT
## Min. :0.004922 Min. :0.003850 Min. :0.002087 Min. :0.0004568
## 1st Qu.:0.009861 1st Qu.:0.004834 1st Qu.:0.002422 1st Qu.:0.0008891
## Median :0.010216 Median :0.005223 Median :0.002594 Median :0.0009504
## Mean :0.010356 Mean :0.005822 Mean :0.002721 Mean :0.0009688
## 3rd Qu.:0.011045 3rd Qu.:0.007174 3rd Qu.:0.002969 3rd Qu.:0.0010167
## Max. :0.012734 Max. :0.008381 Max. :0.004459 Max. :0.0018322
## CTGG GCCC GTGC CATT
## Min. :0.001780 Min. :0.002334 Min. :0.001831 Min. :0.006098
## 1st Qu.:0.002786 1st Qu.:0.002926 1st Qu.:0.001984 1st Qu.:0.008153
## Median :0.005823 Median :0.003211 Median :0.002067 Median :0.008397
## Mean :0.005188 Mean :0.003519 Mean :0.002141 Mean :0.009032
## 3rd Qu.:0.006414 3rd Qu.:0.004174 3rd Qu.:0.002296 3rd Qu.:0.010694
## Max. :0.010821 Max. :0.005435 Max. :0.002820 Max. :0.011593
## TCTG AGGG TAGA GGTG
## Min. :0.001076 Min. :0.002883 Min. :0.002359 Min. :0.003833
## 1st Qu.:0.003968 1st Qu.:0.003194 1st Qu.:0.003225 1st Qu.:0.004516
## Median :0.004668 Median :0.003478 Median :0.003781 Median :0.004719
## Mean :0.004454 Mean :0.003494 Mean :0.003623 Mean :0.004890
## 3rd Qu.:0.004796 3rd Qu.:0.003701 3rd Qu.:0.003928 3rd Qu.:0.005255
## Max. :0.005653 Max. :0.005293 Max. :0.004527 Max. :0.007730
## AACG TGAG AAGA CAGA
## Min. :0.0005399 Min. :0.003878 Min. :0.002888 Min. :0.003683
## 1st Qu.:0.0005845 1st Qu.:0.006436 1st Qu.:0.004730 1st Qu.:0.007163
## Median :0.0006092 Median :0.006828 Median :0.004835 Median :0.007437
## Mean :0.0006565 Mean :0.006768 Mean :0.005229 Mean :0.007548
## 3rd Qu.:0.0007627 3rd Qu.:0.007068 3rd Qu.:0.006299 3rd Qu.:0.007980
## Max. :0.0009039 Max. :0.008380 Max. :0.007090 Max. :0.008875
## AAGG GACA GCGC CGTT
## Min. :0.002632 Min. :0.003091 Min. :0.0002795 Min. :0.001348
## 1st Qu.:0.003379 1st Qu.:0.003515 1st Qu.:0.0003486 1st Qu.:0.001746
## Median :0.003475 Median :0.003615 Median :0.0003723 Median :0.001811
## Mean :0.003756 Mean :0.003869 Mean :0.0003900 Mean :0.002017
## 3rd Qu.:0.004366 3rd Qu.:0.004379 3rd Qu.:0.0004031 3rd Qu.:0.002455
## Max. :0.006034 Max. :0.005882 Max. :0.0011260 Max. :0.002908
## CAGC CAGT AATA GGAG
## Min. :0.003023 Min. :0.003351 Min. :0.002025 Min. :0.005213
## 1st Qu.:0.004625 1st Qu.:0.004508 1st Qu.:0.004589 1st Qu.:0.005826
## Median :0.004937 Median :0.004624 Median :0.004913 Median :0.006128
## Mean :0.005044 Mean :0.004876 Mean :0.005062 Mean :0.006370
## 3rd Qu.:0.005397 3rd Qu.:0.005484 3rd Qu.:0.006057 3rd Qu.:0.006916
## Max. :0.007197 Max. :0.005966 Max. :0.007106 Max. :0.009093
## CGAA GTGA CCAC GCTG
## Min. :0.0008035 Min. :0.003051 Min. :0.004622 Min. :0.003630
## 1st Qu.:0.0014848 1st Qu.:0.003193 1st Qu.:0.007805 1st Qu.:0.004323
## Median :0.0015785 Median :0.003241 Median :0.008178 Median :0.004685
## Mean :0.0016074 Mean :0.003411 Mean :0.008249 Mean :0.005418
## 3rd Qu.:0.0017051 3rd Qu.:0.003709 3rd Qu.:0.008720 3rd Qu.:0.007022
## Max. :0.0031049 Max. :0.004573 Max. :0.010842 Max. :0.008234
## TGCC AAGC AGAA AAAA
## Min. :0.001791 Min. :0.002136 Min. :0.005071 Min. :0.006633
## 1st Qu.:0.003193 1st Qu.:0.002549 1st Qu.:0.006958 1st Qu.:0.010990
## Median :0.003870 Median :0.002591 Median :0.007215 Median :0.011409
## Mean :0.003750 Mean :0.002802 Mean :0.007598 Mean :0.012414
## 3rd Qu.:0.004021 3rd Qu.:0.003272 3rd Qu.:0.008727 3rd Qu.:0.015507
## Max. :0.005624 Max. :0.003631 Max. :0.010270 Max. :0.021323
## GAAG GCTC TGGG GGCA
## Min. :0.003386 Min. :0.002168 Min. :0.003154 Min. :0.003385
## 1st Qu.:0.004457 1st Qu.:0.002572 1st Qu.:0.004385 1st Qu.:0.004372
## Median :0.004561 Median :0.002770 Median :0.004967 Median :0.004565
## Mean :0.004899 Mean :0.003040 Mean :0.004954 Mean :0.004617
## 3rd Qu.:0.005553 3rd Qu.:0.003604 3rd Qu.:0.005294 3rd Qu.:0.004765
## Max. :0.006829 Max. :0.004348 Max. :0.007953 Max. :0.007440
## AATG CCGG GGAT AACA
## Min. :0.003621 Min. :0.0003995 Min. :0.003631 Min. :0.003239
## 1st Qu.:0.004949 1st Qu.:0.0007632 1st Qu.:0.003920 1st Qu.:0.004833
## Median :0.005122 Median :0.0008379 Median :0.003993 Median :0.004979
## Mean :0.005507 Mean :0.0008962 Mean :0.004160 Mean :0.005278
## 3rd Qu.:0.006457 3rd Qu.:0.0009317 3rd Qu.:0.004391 3rd Qu.:0.006200
## Max. :0.007580 Max. :0.0024997 Max. :0.006385 Max. :0.007025
## CATG CCAT GGGT CTGA
## Min. :0.005508 Min. :0.008862 Min. :0.002200 Min. :0.002299
## 1st Qu.:0.006324 1st Qu.:0.010265 1st Qu.:0.002406 1st Qu.:0.003744
## Median :0.006422 Median :0.010669 Median :0.002553 Median :0.006206
## Mean :0.006771 Mean :0.010797 Mean :0.002709 Mean :0.005571
## 3rd Qu.:0.007569 3rd Qu.:0.011445 3rd Qu.:0.003011 3rd Qu.:0.006574
## Max. :0.007959 Max. :0.012870 Max. :0.004523 Max. :0.008925
## CAGG TATT GAGA GAAC
## Min. :0.004196 Min. :0.003360 Min. :0.003549 Min. :0.002257
## 1st Qu.:0.006668 1st Qu.:0.006199 1st Qu.:0.003908 1st Qu.:0.002693
## Median :0.007057 Median :0.006716 Median :0.004017 Median :0.002786
## Mean :0.007166 Mean :0.006831 Mean :0.004347 Mean :0.002755
## 3rd Qu.:0.007428 3rd Qu.:0.007624 3rd Qu.:0.005035 3rd Qu.:0.002839
## Max. :0.011134 Max. :0.008731 Max. :0.006200 Max. :0.003410
## GGCT GGGA AGAC GCAG
## Min. :0.003737 Min. :0.003888 Min. :0.003176 Min. :0.003249
## 1st Qu.:0.004398 1st Qu.:0.004238 1st Qu.:0.003327 1st Qu.:0.003877
## Median :0.004713 Median :0.004399 Median :0.003399 Median :0.004134
## Mean :0.004956 Mean :0.004571 Mean :0.003549 Mean :0.004701
## 3rd Qu.:0.005527 3rd Qu.:0.004865 3rd Qu.:0.003844 3rd Qu.:0.005919
## Max. :0.007272 Max. :0.006563 Max. :0.004904 Max. :0.006796
## CCGC CGAG TTTC ATTC
## Min. :0.0003767 Min. :0.0007919 Min. :0.0006224 Min. :0.003152
## 1st Qu.:0.0006862 1st Qu.:0.0015214 1st Qu.:0.0020804 1st Qu.:0.003874
## Median :0.0007320 Median :0.0015763 Median :0.0045618 Median :0.004002
## Mean :0.0007805 Mean :0.0016194 Mean :0.0038022 Mean :0.004194
## 3rd Qu.:0.0007902 3rd Qu.:0.0016660 3rd Qu.:0.0048003 3rd Qu.:0.004678
## Max. :0.0022275 Max. :0.0033590 Max. :0.0052895 Max. :0.006564
## TGCA AAAT ACAA GAAA
## Min. :0.002892 Min. :0.003869 Min. :0.003378 Min. :0.004225
## 1st Qu.:0.004915 1st Qu.:0.007370 1st Qu.:0.004948 1st Qu.:0.006664
## Median :0.005466 Median :0.007763 Median :0.005190 Median :0.006839
## Mean :0.005324 Mean :0.008035 Mean :0.005476 Mean :0.007570
## 3rd Qu.:0.005615 3rd Qu.:0.009525 3rd Qu.:0.006471 3rd Qu.:0.009355
## Max. :0.006753 Max. :0.011041 Max. :0.007702 Max. :0.011033
## AGCA CAAA GCTA CGCC
## Min. :0.004101 Min. :0.005595 Min. :0.002250 Min. :0.0005664
## 1st Qu.:0.004374 1st Qu.:0.010473 1st Qu.:0.002690 1st Qu.:0.0011759
## Median :0.004437 Median :0.010901 Median :0.002820 Median :0.0012245
## Mean :0.004615 Mean :0.011142 Mean :0.003219 Mean :0.0012841
## 3rd Qu.:0.004898 3rd Qu.:0.012408 3rd Qu.:0.004123 3rd Qu.:0.0013162
## Max. :0.005900 Max. :0.013543 Max. :0.005170 Max. :0.0034118
## ACCA CCCG TCTC CAAC
## Min. :0.004005 Min. :0.0009275 Min. :0.001246 Min. :0.002704
## 1st Qu.:0.004539 1st Qu.:0.0022363 1st Qu.:0.004226 1st Qu.:0.004042
## Median :0.004683 Median :0.0023451 Median :0.005046 Median :0.004128
## Mean :0.004852 Mean :0.0024635 Mean :0.004803 Mean :0.004230
## 3rd Qu.:0.005230 3rd Qu.:0.0025097 3rd Qu.:0.005203 3rd Qu.:0.004548
## Max. :0.006669 Max. :0.0060046 Max. :0.005823 Max. :0.004841
## ACTA GCGG TGCT GAGT
## Min. :0.002007 Min. :0.0003187 Min. :0.002898 Min. :0.002132
## 1st Qu.:0.002904 1st Qu.:0.0004096 1st Qu.:0.004372 1st Qu.:0.002348
## Median :0.003072 Median :0.0004385 Median :0.004914 Median :0.002412
## Mean :0.003205 Mean :0.0004605 Mean :0.004760 Mean :0.002661
## 3rd Qu.:0.003672 3rd Qu.:0.0004813 3rd Qu.:0.005019 3rd Qu.:0.003172
## Max. :0.004638 Max. :0.0012246 Max. :0.006109 Max. :0.003835
## CGTG AGGC TAGC CTCC
## Min. :0.001164 Min. :0.002964 Min. :0.001383 Min. :0.001731
## 1st Qu.:0.001728 1st Qu.:0.003300 1st Qu.:0.001711 1st Qu.:0.002026
## Median :0.001836 Median :0.003512 Median :0.002002 Median :0.004517
## Mean :0.001887 Mean :0.003546 Mean :0.001918 Mean :0.003910
## 3rd Qu.:0.002028 3rd Qu.:0.003666 3rd Qu.:0.002052 3rd Qu.:0.004752
## Max. :0.003025 Max. :0.005682 Max. :0.002423 Max. :0.007973
## TCCT ACTG CAAG AAGT
## Min. :0.001614 Min. :0.003530 Min. :0.004549 Min. :0.002092
## 1st Qu.:0.003718 1st Qu.:0.004050 1st Qu.:0.006985 1st Qu.:0.003049
## Median :0.004812 Median :0.004202 Median :0.007281 Median :0.003111
## Mean :0.004484 Mean :0.004554 Mean :0.007327 Mean :0.003352
## 3rd Qu.:0.004959 3rd Qu.:0.005375 3rd Qu.:0.007702 3rd Qu.:0.004012
## Max. :0.005655 Max. :0.006635 Max. :0.008624 Max. :0.004463
## CGCA TAAT GTTT TGAT
## Min. :0.0003577 Min. :0.003165 Min. :0.003451 Min. :0.004650
## 1st Qu.:0.0008449 1st Qu.:0.004815 1st Qu.:0.004931 1st Qu.:0.005176
## Median :0.0009012 Median :0.005578 Median :0.005167 Median :0.006095
## Mean :0.0009236 Mean :0.005565 Mean :0.005622 Mean :0.005841
## 3rd Qu.:0.0009897 3rd Qu.:0.006312 3rd Qu.:0.006808 3rd Qu.:0.006328
## Max. :0.0015208 Max. :0.007239 Max. :0.008885 Max. :0.007156
## CTAT TAGG TGTT AGAT
## Min. :0.002168 Min. :0.001507 Min. :0.004916 Min. :0.002883
## 1st Qu.:0.002605 1st Qu.:0.002303 1st Qu.:0.006774 1st Qu.:0.004128
## Median :0.004055 Median :0.002455 Median :0.007129 Median :0.004246
## Mean :0.003742 Mean :0.002428 Mean :0.007036 Mean :0.004399
## 3rd Qu.:0.004444 3rd Qu.:0.002535 3rd Qu.:0.007311 3rd Qu.:0.004909
## Max. :0.013611 Max. :0.003059 Max. :0.009400 Max. :0.007347
## GGTA GTTG GTGG CTCA
## Min. :0.001911 Min. :0.002383 Min. :0.002501 Min. :0.002534
## 1st Qu.:0.002818 1st Qu.:0.002659 1st Qu.:0.002757 1st Qu.:0.003036
## Median :0.002910 Median :0.002709 Median :0.002928 Median :0.005875
## Mean :0.002960 Mean :0.002917 Mean :0.003043 Mean :0.005158
## 3rd Qu.:0.003024 3rd Qu.:0.003340 3rd Qu.:0.003326 3rd Qu.:0.006223
## Max. :0.005146 Max. :0.004011 Max. :0.004162 Max. :0.010311
## CTCT GCAT GCAA GTTA
## Min. :0.002565 Min. :0.002832 Min. :0.002878 Min. :0.001224
## 1st Qu.:0.002978 1st Qu.:0.003418 1st Qu.:0.003455 1st Qu.:0.002402
## Median :0.005690 Median :0.003546 Median :0.003589 Median :0.002536
## Mean :0.004969 Mean :0.004001 Mean :0.004142 Mean :0.002631
## 3rd Qu.:0.005950 3rd Qu.:0.005075 3rd Qu.:0.005446 3rd Qu.:0.003018
## Max. :0.011078 Max. :0.006111 Max. :0.006242 Max. :0.004413
## ACTC CTTT GATG ATGG
## Min. :0.002940 Min. :0.004677 Min. :0.003103 Min. :0.003294
## 1st Qu.:0.003222 1st Qu.:0.006509 1st Qu.:0.003491 1st Qu.:0.003711
## Median :0.003376 Median :0.009196 Median :0.003563 Median :0.003911
## Mean :0.003684 Mean :0.008464 Mean :0.003578 Mean :0.003923
## 3rd Qu.:0.004406 3rd Qu.:0.009652 3rd Qu.:0.003660 3rd Qu.:0.004113
## Max. :0.005409 Max. :0.015564 Max. :0.004669 Max. :0.005528
## ACAT CTTA ACCT ACAC
## Min. :0.003048 Min. :0.002539 Min. :0.002553 Min. :0.003159
## 1st Qu.:0.004754 1st Qu.:0.003157 1st Qu.:0.003288 1st Qu.:0.003553
## Median :0.005031 Median :0.004586 Median :0.004017 Median :0.003655
## Mean :0.005274 Mean :0.004192 Mean :0.003827 Mean :0.003845
## 3rd Qu.:0.006177 3rd Qu.:0.004916 3rd Qu.:0.004217 3rd Qu.:0.004296
## Max. :0.008669 Max. :0.007894 Max. :0.005017 Max. :0.005082
## TCTA ATAA ATTA GCTT
## Min. :0.001057 Min. :0.002379 Min. :0.002035 Min. :0.002709
## 1st Qu.:0.003055 1st Qu.:0.004800 1st Qu.:0.003722 1st Qu.:0.003454
## Median :0.003628 Median :0.005111 Median :0.003930 Median :0.003647
## Mean :0.003457 Mean :0.005064 Mean :0.003962 Mean :0.004428
## 3rd Qu.:0.003826 3rd Qu.:0.005713 3rd Qu.:0.004550 3rd Qu.:0.006183
## Max. :0.004201 Max. :0.009312 Max. :0.006654 Max. :0.007491
## TCTT GGAA GGAC TTTT
## Min. :0.001912 Min. :0.006592 Min. :0.002369 Min. :0.002078
## 1st Qu.:0.005118 1st Qu.:0.007076 1st Qu.:0.002598 1st Qu.:0.004387
## Median :0.005911 Median :0.007323 Median :0.002684 Median :0.009058
## Mean :0.005651 Mean :0.007740 Mean :0.002757 Mean :0.007744
## 3rd Qu.:0.006120 3rd Qu.:0.008566 3rd Qu.:0.002922 3rd Qu.:0.009894
## Max. :0.006985 Max. :0.010799 Max. :0.003794 Max. :0.010851
## ATTG TCAT TGAA AGGA
## Min. :0.001789 Min. :0.001998 Min. :0.004616 Min. :0.004087
## 1st Qu.:0.002834 1st Qu.:0.003698 1st Qu.:0.006866 1st Qu.:0.004369
## Median :0.002916 Median :0.004974 Median :0.007839 Median :0.004569
## Mean :0.003034 Mean :0.004556 Mean :0.007621 Mean :0.004736
## 3rd Qu.:0.003477 3rd Qu.:0.005158 3rd Qu.:0.008253 3rd Qu.:0.005170
## Max. :0.004327 Max. :0.005431 Max. :0.009684 Max. :0.005918
## ATAT ATGA TTAT GGGG
## Min. :0.001264 Min. :0.002323 Min. :0.001131 Min. :0.002588
## 1st Qu.:0.001622 1st Qu.:0.003648 1st Qu.:0.001872 1st Qu.:0.002958
## Median :0.004074 Median :0.003749 Median :0.004123 Median :0.003197
## Mean :0.003464 Mean :0.003873 Mean :0.003520 Mean :0.003394
## 3rd Qu.:0.004671 3rd Qu.:0.004360 3rd Qu.:0.004645 3rd Qu.:0.003712
## Max. :0.011717 Max. :0.004906 Max. :0.005929 Max. :0.007454
## GAAT ACTT CAAT AAAG
## Min. :0.005301 Min. :0.002992 Min. :0.004277 Min. :0.004128
## 1st Qu.:0.006139 1st Qu.:0.004218 1st Qu.:0.006040 1st Qu.:0.006284
## Median :0.006481 Median :0.004422 Median :0.006245 Median :0.006442
## Mean :0.006788 Mean :0.004869 Mean :0.006332 Mean :0.006815
## 3rd Qu.:0.007480 3rd Qu.:0.005899 3rd Qu.:0.006976 3rd Qu.:0.007933
## Max. :0.009877 Max. :0.007253 Max. :0.007446 Max. :0.009388
## TTTA TAAA ATGT TGGC
## Min. :0.0009052 Min. :0.004142 Min. :0.0007112 Min. :0.002076
## 1st Qu.:0.0025056 1st Qu.:0.006997 1st Qu.:0.0008906 1st Qu.:0.002809
## Median :0.0049148 Median :0.007659 Median :0.0034715 Median :0.003368
## Mean :0.0043734 Mean :0.007749 Mean :0.0027029 Mean :0.003279
## 3rd Qu.:0.0056841 3rd Qu.:0.008641 3rd Qu.:0.0036681 3rd Qu.:0.003512
## Max. :0.0062992 Max. :0.010517 Max. :0.0051020 Max. :0.005112
## TGAC CGGT GTAA CGGA
## Min. :0.002165 Min. :0.0003545 Min. :0.002074 Min. :0.0004008
## 1st Qu.:0.003000 1st Qu.:0.0006181 1st Qu.:0.002859 1st Qu.:0.0008425
## Median :0.003558 Median :0.0006548 Median :0.002941 Median :0.0008720
## Mean :0.003378 Mean :0.0006744 Mean :0.003057 Mean :0.0008980
## 3rd Qu.:0.003615 3rd Qu.:0.0007208 3rd Qu.:0.003469 3rd Qu.:0.0009194
## Max. :0.004034 Max. :0.0010933 Max. :0.004325 Max. :0.0016371
## AACT CTTG AGGT TATG
## Min. :0.001517 Min. :0.002114 Min. :0.002636 Min. :0.001937
## 1st Qu.:0.001812 1st Qu.:0.002934 1st Qu.:0.002813 1st Qu.:0.003786
## Median :0.003355 Median :0.005025 Median :0.002919 Median :0.003988
## Mean :0.002881 Mean :0.004479 Mean :0.003059 Mean :0.003932
## 3rd Qu.:0.003500 3rd Qu.:0.005254 3rd Qu.:0.003388 3rd Qu.:0.004183
## Max. :0.003924 Max. :0.008019 Max. :0.003840 Max. :0.004561
## AGTT GATT GGTT TACA
## Min. :0.002618 Min. :0.002547 Min. :0.003094 Min. :0.002774
## 1st Qu.:0.003827 1st Qu.:0.003368 1st Qu.:0.003640 1st Qu.:0.004547
## Median :0.003951 Median :0.003478 Median :0.003756 Median :0.004832
## Mean :0.004229 Mean :0.003581 Mean :0.004183 Mean :0.004771
## 3rd Qu.:0.004960 3rd Qu.:0.003862 3rd Qu.:0.005052 3rd Qu.:0.005040
## Max. :0.006067 Max. :0.005001 Max. :0.006589 Max. :0.005829
## GACC AATC TATA CACA
## Min. :0.001657 Min. :0.002331 Min. :0.002174 Min. :0.004728
## 1st Qu.:0.001936 1st Qu.:0.002906 1st Qu.:0.004887 1st Qu.:0.007731
## Median :0.002046 Median :0.002993 Median :0.005302 Median :0.007874
## Mean :0.002076 Mean :0.002974 Mean :0.005240 Mean :0.008236
## 3rd Qu.:0.002217 3rd Qu.:0.003052 3rd Qu.:0.005764 3rd Qu.:0.009067
## Max. :0.002617 Max. :0.004196 Max. :0.006743 Max. :0.009935
## TATC TCAC AATT GTGT
## Min. :0.001737 Min. :0.0009431 Min. :0.002470 Min. :0.002974
## 1st Qu.:0.002942 1st Qu.:0.0025234 1st Qu.:0.004656 1st Qu.:0.003128
## Median :0.003284 Median :0.0035309 Median :0.004921 Median :0.003184
## Mean :0.003213 Mean :0.0032265 Mean :0.005255 Mean :0.003331
## 3rd Qu.:0.003508 3rd Qu.:0.0036285 3rd Qu.:0.006487 3rd Qu.:0.003612
## Max. :0.003915 Max. :0.0041136 Max. :0.007856 Max. :0.004384
## AGTA CCGT TAAG TTAA
## Min. :0.002214 Min. :0.0004765 Min. :0.001705 Min. :0.001328
## 1st Qu.:0.003185 1st Qu.:0.0007311 1st Qu.:0.003386 1st Qu.:0.002101
## Median :0.003280 Median :0.0007613 Median :0.003686 Median :0.003988
## Mean :0.003371 Mean :0.0007878 Mean :0.003606 Mean :0.003521
## 3rd Qu.:0.003735 3rd Qu.:0.0008073 3rd Qu.:0.003858 3rd Qu.:0.004551
## Max. :0.004812 Max. :0.0014894 Max. :0.004340 Max. :0.005592
## CTGT GTTC CCGA GATA
## Min. :0.002329 Min. :0.002151 Min. :0.0003616 Min. :0.001348
## 1st Qu.:0.002864 1st Qu.:0.002356 1st Qu.:0.0007457 1st Qu.:0.002486
## Median :0.005629 Median :0.002397 Median :0.0007883 Median :0.002599
## Mean :0.004931 Mean :0.002408 Mean :0.0008184 Mean :0.002580
## 3rd Qu.:0.005971 3rd Qu.:0.002447 3rd Qu.:0.0008428 3rd Qu.:0.002707
## Max. :0.009707 Max. :0.003632 Max. :0.0017440 Max. :0.003838
## AGCG CGGG CATA AGCT
## Min. :0.0005254 Min. :0.0006588 Min. :0.003179 Min. :0.001549
## 1st Qu.:0.0005984 1st Qu.:0.0012456 1st Qu.:0.005235 1st Qu.:0.001870
## Median :0.0006273 Median :0.0012935 Median :0.005485 Median :0.003561
## Mean :0.0006486 Mean :0.0013664 Mean :0.005677 Mean :0.003071
## 3rd Qu.:0.0006736 3rd Qu.:0.0013792 3rd Qu.:0.006620 3rd Qu.:0.003650
## Max. :0.0012577 Max. :0.0033410 Max. :0.007298 Max. :0.004267
## TGCG ATAG ATGC TTGC
## Min. :0.0001903 Min. :0.001707 Min. :0.001868 Min. :0.0002764
## 1st Qu.:0.0005027 1st Qu.:0.002781 1st Qu.:0.002280 1st Qu.:0.0007983
## Median :0.0005331 Median :0.002965 Median :0.002341 Median :0.0022271
## Mean :0.0005476 Mean :0.002949 Mean :0.002462 Mean :0.0017803
## 3rd Qu.:0.0005587 3rd Qu.:0.003191 3rd Qu.:0.002756 3rd Qu.:0.0022753
## Max. :0.0009778 Max. :0.004920 Max. :0.003102 Max. :0.0024776
## TGTC TCAA GGTC TTCT
## Min. :0.002844 Min. :0.001474 Min. :0.002216 Min. :0.0006882
## 1st Qu.:0.004170 1st Qu.:0.003040 1st Qu.:0.002594 1st Qu.:0.0012814
## Median :0.004356 Median :0.004238 Median :0.002687 Median :0.0045398
## Mean :0.004321 Mean :0.003855 Mean :0.002751 Mean :0.0035484
## 3rd Qu.:0.004453 3rd Qu.:0.004445 3rd Qu.:0.002892 3rd Qu.:0.0047297
## Max. :0.005171 Max. :0.004628 Max. :0.003634 Max. :0.0055298
## TTTG GACT TTGT CACG
## Min. :0.0006146 Min. :0.002266 Min. :0.000503 Min. :0.0006426
## 1st Qu.:0.0017310 1st Qu.:0.002601 1st Qu.:0.001320 1st Qu.:0.0011636
## Median :0.0046628 Median :0.002689 Median :0.003545 Median :0.0012436
## Mean :0.0037387 Mean :0.002952 Mean :0.002859 Mean :0.0012650
## 3rd Qu.:0.0048058 3rd Qu.:0.003549 3rd Qu.:0.003691 3rd Qu.:0.0013430
## Max. :0.0051625 Max. :0.004275 Max. :0.004085 Max. :0.0021641
## CGTA CTTC ATTT CGTC
## Min. :0.0006038 Min. :0.002296 Min. :0.002098 Min. :0.0007170
## 1st Qu.:0.0008710 1st Qu.:0.003172 1st Qu.:0.002704 1st Qu.:0.0009202
## Median :0.0009004 Median :0.005309 Median :0.006143 Median :0.0009849
## Mean :0.0009690 Mean :0.004728 Mean :0.005188 Mean :0.0010024
## 3rd Qu.:0.0011404 3rd Qu.:0.005492 3rd Qu.:0.006839 3rd Qu.:0.0010753
## Max. :0.0014488 Max. :0.008466 Max. :0.012129 Max. :0.0016191
## GTAT TTGA TGGT GTAC
## Min. :0.001581 Min. :0.0004935 Min. :0.002294 Min. :0.001014
## 1st Qu.:0.002676 1st Qu.:0.0016770 1st Qu.:0.002883 1st Qu.:0.001341
## Median :0.002809 Median :0.0034272 Median :0.003532 Median :0.001381
## Mean :0.002842 Mean :0.0028896 Mean :0.003346 Mean :0.001428
## 3rd Qu.:0.003147 3rd Qu.:0.0035855 3rd Qu.:0.003643 3rd Qu.:0.001557
## Max. :0.005572 Max. :0.0039643 Max. :0.004377 Max. :0.002108
## TTAG TCGT CTAC TTGG
## Min. :0.0003805 Min. :8.181e-05 Min. :0.001289 Min. :0.0003397
## 1st Qu.:0.0010875 1st Qu.:1.696e-04 1st Qu.:0.001569 1st Qu.:0.0009718
## Median :0.0023496 Median :4.200e-04 Median :0.003060 Median :0.0026845
## Mean :0.0019530 Mean :3.476e-04 Mean :0.002676 Mean :0.0021643
## 3rd Qu.:0.0024648 3rd Qu.:4.349e-04 3rd Qu.:0.003221 3rd Qu.:0.0027672
## Max. :0.0029018 Max. :6.536e-04 Max. :0.007059 Max. :0.0033687
## ACGC CGCG TCAG ATAC
## Min. :0.0003036 Min. :0.0001145 Min. :0.001009 Min. :0.001491
## 1st Qu.:0.0003594 1st Qu.:0.0002623 1st Qu.:0.002899 1st Qu.:0.002290
## Median :0.0003832 Median :0.0002765 Median :0.003739 Median :0.002405
## Mean :0.0003914 Mean :0.0002997 Mean :0.003481 Mean :0.002401
## 3rd Qu.:0.0004123 3rd Qu.:0.0003013 3rd Qu.:0.003845 3rd Qu.:0.002622
## Max. :0.0007267 Max. :0.0009874 Max. :0.004673 Max. :0.004557
## TGGA GTAG TACT ACGT
## Min. :0.003977 Min. :0.002104 Min. :0.002230 Min. :0.0004811
## 1st Qu.:0.004958 1st Qu.:0.002202 1st Qu.:0.003179 1st Qu.:0.0005127
## Median :0.006041 Median :0.002245 Median :0.003483 Median :0.0005250
## Mean :0.005815 Mean :0.002376 Mean :0.003409 Mean :0.0005599
## 3rd Qu.:0.006398 3rd Qu.:0.002642 3rd Qu.:0.003636 3rd Qu.:0.0006371
## Max. :0.008511 Max. :0.003261 Max. :0.004307 Max. :0.0007101
## GATC ATCT CGGC
## Min. :0.0009177 Min. :0.0008322 Min. :0.0004467
## 1st Qu.:0.0018457 1st Qu.:0.0010949 1st Qu.:0.0008398
## Median :0.0019858 Median :0.0032574 Median :0.0008817
## Mean :0.0018882 Mean :0.0026068 Mean :0.0009242
## 3rd Qu.:0.0020487 3rd Qu.:0.0034151 3rd Qu.:0.0009554
## Max. :0.0035019 Max. :0.0056962 Max. :0.0022846
## CGAC CGAT ATCA ACCG
## Min. :0.0002929 Min. :0.000884 Min. :0.0006918 Min. :0.0004739
## 1st Qu.:0.0005518 1st Qu.:0.001026 1st Qu.:0.0009202 1st Qu.:0.0005649
## Median :0.0005705 Median :0.001060 Median :0.0032631 Median :0.0006138
## Mean :0.0005795 Mean :0.001075 Mean :0.0025481 Mean :0.0006232
## 3rd Qu.:0.0005975 3rd Qu.:0.001102 3rd Qu.:0.0034032 3rd Qu.:0.0006665
## Max. :0.0009464 Max. :0.001916 Max. :0.0048867 Max. :0.0010016
## TTCA GTCC GCCG TAGT
## Min. :0.0006472 Min. :0.001528 Min. :0.0005879 Min. :0.001512
## 1st Qu.:0.0012519 1st Qu.:0.001645 1st Qu.:0.0007359 1st Qu.:0.001841
## Median :0.0040287 Median :0.001741 Median :0.0008148 Median :0.002311
## Mean :0.0032096 Mean :0.001803 Mean :0.0008662 Mean :0.002161
## 3rd Qu.:0.0042349 3rd Qu.:0.001986 3rd Qu.:0.0009691 3rd Qu.:0.002413
## Max. :0.0057198 Max. :0.002193 Max. :0.0021263 Max. :0.002606
## ACGA GCGT CTCG
## Min. :0.0003693 Min. :0.0003422 Min. :0.0002895
## 1st Qu.:0.0004040 1st Qu.:0.0003990 1st Qu.:0.0004305
## Median :0.0004170 Median :0.0004292 Median :0.0009141
## Mean :0.0004211 Mean :0.0004485 Mean :0.0008130
## 3rd Qu.:0.0004366 3rd Qu.:0.0004932 3rd Qu.:0.0009866
## Max. :0.0005431 Max. :0.0008635 Max. :0.0023204
## TCCG TCGG GTCT TTCC
## Min. :0.0001150 Min. :5.283e-05 Min. :0.000874 Min. :0.0006142
## 1st Qu.:0.0002974 1st Qu.:1.149e-04 1st Qu.:0.001235 1st Qu.:0.0012657
## Median :0.0004463 Median :3.381e-04 Median :0.002919 Median :0.0035202
## Mean :0.0004227 Mean :2.827e-04 Mean :0.002404 Mean :0.0028661
## 3rd Qu.:0.0004742 3rd Qu.:3.560e-04 3rd Qu.:0.002987 3rd Qu.:0.0036743
## Max. :0.0009986 Max. :7.922e-04 Max. :0.004203 Max. :0.0043989
## TCGC GCGA GACG
## Min. :3.912e-05 Min. :0.0002849 Min. :0.0003813
## 1st Qu.:1.015e-04 1st Qu.:0.0003421 1st Qu.:0.0004607
## Median :2.830e-04 Median :0.0003618 Median :0.0005039
## Mean :2.396e-04 Mean :0.0003707 Mean :0.0005291
## 3rd Qu.:2.986e-04 3rd Qu.:0.0003896 3rd Qu.:0.0005982
## Max. :6.348e-04 Max. :0.0007549 Max. :0.0008905
## TTAC TCGA TACG
## Min. :0.0003610 Min. :9.825e-05 Min. :0.0002086
## 1st Qu.:0.0009079 1st Qu.:2.412e-04 1st Qu.:0.0003623
## Median :0.0021535 Median :6.637e-04 Median :0.0003698
## Mean :0.0017814 Mean :5.638e-04 Mean :0.0003706
## 3rd Qu.:0.0022875 3rd Qu.:7.262e-04 3rd Qu.:0.0003784
## Max. :0.0026693 Max. :1.823e-03 Max. :0.0004968
## TTCG GTCG ATCG
## Min. :5.807e-05 Min. :0.0002218 Min. :0.0001089
## 1st Qu.:1.659e-04 1st Qu.:0.0002531 1st Qu.:0.0001660
## Median :5.011e-04 Median :0.0002728 Median :0.0004993
## Mean :4.096e-04 Mean :0.0002817 Mean :0.0004114
## 3rd Qu.:5.246e-04 3rd Qu.:0.0003069 3rd Qu.:0.0005314
## Max. :1.094e-03 Max. :0.0004958 Max. :0.0012315
library(ggpubr)
library(rstatix)
library(tidyverse)
library(nortest)
# Reorder groups
data_clean$Group <- factor(data_clean$Group,
levels = c("Healthy", "CLD", "HCC"))
# ============================================
# 1. NORMALITY TESTING
# ============================================
# Function to perform comprehensive normality tests
test_normality <- function(data, variable_name) {
cat("\n", paste(rep("=", 50), collapse=""), "\n")
cat("NORMALITY TESTS FOR:", variable_name, "\n")
cat(paste(rep("=", 50), collapse=""), "\n\n")
# Get the variable
var_data <- data[[variable_name]]
# 1. Overall Shapiro-Wilk test
overall_shapiro <- shapiro.test(var_data)
cat("Overall Shapiro-Wilk test:\n")
cat(sprintf(" W = %.4f, p-value = %.4f\n", overall_shapiro$statistic, overall_shapiro$p.value))
cat(sprintf(" Conclusion: %s\n\n",
ifelse(overall_shapiro$p.value < 0.05,
"NOT normally distributed (p < 0.05)",
"Normally distributed (p >= 0.05)")))
# 2. Shapiro-Wilk test by group
cat("Shapiro-Wilk test by group:\n")
shapiro_by_group <- data %>%
group_by(Group) %>%
summarise(
n = n(),
W_statistic = shapiro.test(get(variable_name))$statistic,
p_value = shapiro.test(get(variable_name))$p.value,
normality = ifelse(shapiro.test(get(variable_name))$p.value < 0.05, "NOT Normal", "Normal")
)
print(as.data.frame(shapiro_by_group))
# 3. Anderson-Darling test (more sensitive to tails)
ad_test <- ad.test(var_data)
cat("\nAnderson-Darling test:\n")
cat(sprintf(" A = %.4f, p-value = %.4f\n", ad_test$statistic, ad_test$p.value))
cat(sprintf(" Conclusion: %s\n",
ifelse(ad_test$p.value < 0.05,
"NOT normally distributed (p < 0.05)",
"Normally distributed (p >= 0.05)")))
# 4. Check for outliers
outliers <- data %>%
group_by(Group) %>%
identify_outliers(all_of(variable_name))
cat("\nOutlier Detection:\n")
if(nrow(outliers) > 0) {
cat(sprintf(" Found %d outliers (%d extreme)\n",
nrow(outliers),
sum(outliers$is.extreme)))
print(outliers %>% select(Group, all_of(variable_name), is.outlier, is.extreme))
} else {
cat(" No outliers detected\n")
}
# Return results
return(list(
overall_normal = overall_shapiro$p.value >= 0.05,
group_normal = all(shapiro_by_group$p_value >= 0.05),
has_outliers = nrow(outliers) > 0,
recommendation = ifelse(
overall_shapiro$p.value >= 0.05 & all(shapiro_by_group$p_value >= 0.05),
"Use parametric tests (ANOVA)",
"Use non-parametric tests (Kruskal-Wallis)"
)
))
}
# Test normality for all three biomarkers
delfi_norm <- test_normality(data_clean, "DELFI_Score")##
## ==================================================
## NORMALITY TESTS FOR: DELFI_Score
## ==================================================
##
## Overall Shapiro-Wilk test:
## W = 0.7977, p-value = 0.0000
## Conclusion: NOT normally distributed (p < 0.05)
##
## Shapiro-Wilk test by group:
## Group n W_statistic p_value normality
## 1 Healthy 150 0.9897750 3.468464e-01 Normal
## 2 CLD 186 0.8528740 2.016845e-12 NOT Normal
## 3 HCC 139 0.8641797 5.804626e-10 NOT Normal
##
## Anderson-Darling test:
## A = 20.9481, p-value = 0.0000
## Conclusion: NOT normally distributed (p < 0.05)
##
## Outlier Detection:
## Found 18 outliers (3 extreme)
## # A tibble: 18 × 4
## Group DELFI_Score is.outlier is.extreme
## <fct> <dbl> <lgl> <lgl>
## 1 Healthy 0.0921 TRUE FALSE
## 2 CLD 0.437 TRUE FALSE
## 3 CLD 0.404 TRUE FALSE
## 4 CLD 0.410 TRUE FALSE
## 5 CLD 0.411 TRUE FALSE
## 6 CLD 0.464 TRUE FALSE
## 7 CLD 0.703 TRUE TRUE
## 8 CLD 0.459 TRUE FALSE
## 9 CLD 0.446 TRUE FALSE
## 10 HCC 0.583 TRUE FALSE
## 11 HCC 0.903 TRUE TRUE
## 12 HCC 0.623 TRUE FALSE
## 13 HCC 0.647 TRUE FALSE
## 14 HCC 0.677 TRUE FALSE
## 15 HCC 0.930 TRUE TRUE
## 16 HCC 0.622 TRUE FALSE
## 17 HCC 0.609 TRUE FALSE
## 18 HCC 0.649 TRUE FALSE
##
## ==================================================
## NORMALITY TESTS FOR: TF_Score
## ==================================================
##
## Overall Shapiro-Wilk test:
## W = 0.5058, p-value = 0.0000
## Conclusion: NOT normally distributed (p < 0.05)
##
## Shapiro-Wilk test by group:
## Group n W_statistic p_value normality
## 1 Healthy 150 0.6584843 3.472946e-17 NOT Normal
## 2 CLD 186 0.4059785 1.925713e-24 NOT Normal
## 3 HCC 139 0.7732059 2.190308e-13 NOT Normal
##
## Anderson-Darling test:
## A = 83.9776, p-value = 0.0000
## Conclusion: NOT normally distributed (p < 0.05)
##
## Outlier Detection:
## Found 44 outliers (20 extreme)
## # A tibble: 44 × 4
## Group TF_Score is.outlier is.extreme
## <fct> <dbl> <lgl> <lgl>
## 1 Healthy 0.00251 TRUE FALSE
## 2 Healthy 0.00264 TRUE FALSE
## 3 Healthy 0.00256 TRUE FALSE
## 4 Healthy 0.00211 TRUE FALSE
## 5 Healthy 0.00444 TRUE TRUE
## 6 Healthy 0.00307 TRUE TRUE
## 7 Healthy 0.00249 TRUE FALSE
## 8 Healthy 0.00219 TRUE FALSE
## 9 Healthy 0.00410 TRUE TRUE
## 10 Healthy 0.00254 TRUE FALSE
## # ℹ 34 more rows
##
## ==================================================
## NORMALITY TESTS FOR: mtcfDNA_fraction
## ==================================================
##
## Overall Shapiro-Wilk test:
## W = 0.5517, p-value = 0.0000
## Conclusion: NOT normally distributed (p < 0.05)
##
## Shapiro-Wilk test by group:
## Group n W_statistic p_value normality
## 1 Healthy 150 0.8531280 6.176345e-11 NOT Normal
## 2 CLD 186 0.7973768 8.620336e-15 NOT Normal
## 3 HCC 139 0.5804583 3.009384e-18 NOT Normal
##
## Anderson-Darling test:
## A = 47.5761, p-value = 0.0000
## Conclusion: NOT normally distributed (p < 0.05)
##
## Outlier Detection:
## Found 31 outliers (12 extreme)
## # A tibble: 31 × 4
## Group mtcfDNA_fraction is.outlier is.extreme
## <fct> <dbl> <lgl> <lgl>
## 1 Healthy 0.00327 TRUE TRUE
## 2 Healthy 0.00315 TRUE TRUE
## 3 Healthy 0.00272 TRUE FALSE
## 4 Healthy 0.00229 TRUE FALSE
## 5 Healthy 0.00296 TRUE FALSE
## 6 Healthy 0.00221 TRUE FALSE
## 7 Healthy 0.00233 TRUE FALSE
## 8 Healthy 0.00256 TRUE FALSE
## 9 Healthy 0.00268 TRUE FALSE
## 10 Healthy 0.00228 TRUE FALSE
## # ℹ 21 more rows
##
## ==================================================
## STATISTICAL TEST RECOMMENDATIONS
## ==================================================
## DELFI_Score: Use non-parametric tests (Kruskal-Wallis)
## TF_Score: Use non-parametric tests (Kruskal-Wallis)
## mtcfDNA_fraction: Use non-parametric tests (Kruskal-Wallis)
🕵🏻 The Shapiro-Wilk test assesses the null hypothesis that a sample came from a normally distributed population. A small p-value (typically < 0.05) means you reject the null hypothesis and conclude the data is not normally distributed.
CLD: The p-value is 2.02×10e−12 , which is far below 0.05. This data is not normally distributed.
HCC: The p-value is 5.80×10e−10, also far below 0.05. This data is also not normally distributed.
Healthy: The p-value is 0.347, which is greater than 0.05. This data is consistent with a normal distribution.
Since a parametric test like a one-way ANOVA requires that the data in all groups be approximately normally distributed, it is not suitable for my analysis. Two of my three groups violate this critical assumption.
Therefore, the correct choice is Kruskal-Wallis test.
# ============================================
# 2. VISUAL ASSESSMENT OF NORMALITY
# ============================================
# Function to create normality diagnostic plots
create_normality_plots <- function(data, variable_name) {
# Q-Q plot by group
qq_plot <- ggqqplot(data,
x = variable_name,
color = "Group",
palette = c("#4CAF50", "#FFC107", "#F44336"),
facet.by = "Group") +
labs(title = paste("Q-Q Plot:", variable_name),
subtitle = "Points should follow the diagonal line for normal distribution")
# Histogram with density curve
hist_plot <- gghistogram(data,
x = variable_name,
fill = "Group",
palette = c("#4CAF50", "#FFC107", "#F44336"),
facet.by = "Group",
add_density = TRUE,
bins = 15) +
labs(title = paste("Histogram:", variable_name),
subtitle = "Check for bell-shaped distribution")
# Boxplot to check for outliers and symmetry
box_plot <- ggboxplot(data,
x = "Group",
y = variable_name,
fill = "Group",
palette = c("#4CAF50", "#FFC107", "#F44336"),
add = "jitter",
add.params = list(alpha = 0.3)) +
labs(title = paste("Boxplot:", variable_name),
subtitle = "Check for outliers and symmetry") +
theme(legend.position = "none")
# Combine plots
library(patchwork)
combined <- (qq_plot / hist_plot / box_plot) +
plot_annotation(
title = paste("Normality Assessment for", variable_name),
theme = theme(plot.title = element_text(size = 16, face = "bold"))
)
return(combined)
}
# Create normality plots for each biomarker
delfi_norm_plots <- create_normality_plots(data_clean, "DELFI_Score")
tf_norm_plots <- create_normality_plots(data_clean, "TF_Score")
mt_norm_plots <- create_normality_plots(data_clean, "mtcfDNA_fraction")
# Display and save plots
print(delfi_norm_plots)ggsave("delfi_normality_assessment.png", delfi_norm_plots, width = 12, height = 14, dpi = 300)
print(tf_norm_plots)ggsave("tf_normality_assessment.png", tf_norm_plots, width = 12, height = 14, dpi = 300)
print(mt_norm_plots)🕵🏻 The Shapiro-Wilk test assesses the null hypothesis that a sample came from a normally distributed population. A small p-value (typically < 0.05) means you reject the null hypothesis and conclude the data is not normally distributed.
CLD: The p-value is 2.02×10e−12 , which is far below 0.05. This data is not normally distributed.
HCC: The p-value is 5.80×10e−10, also far below 0.05. This data is also not normally distributed.
Healthy: The p-value is 0.347, which is greater than 0.05. This data is consistent with a normal distribution.
Since a parametric test like a one-way ANOVA requires that the data in all groups be approximately normally distributed, it is not suitable for my analysis. Two of my three groups violate this critical assumption.
Therefore, the correct choice is Kruskal-Wallis test.
# ============================================
# 3. HOMOGENEITY OF VARIANCE TESTING
# ============================================
cat("\n", paste(rep("=", 50), collapse=""), "\n")##
## ==================================================
## HOMOGENEITY OF VARIANCE TESTS
## ==================================================
# Levene's test for each biomarker
levene_delfi <- data_clean %>% levene_test(DELFI_Score ~ Group)
levene_tf <- data_clean %>% levene_test(TF_Score ~ Group)
levene_mt <- data_clean %>% levene_test(mtcfDNA_fraction ~ Group)
cat("Levene's Test Results:\n")## Levene's Test Results:
## (H0: variances are equal across groups)
## DELFI_Score:
## # A tibble: 1 × 4
## df1 df2 statistic p
## <int> <int> <dbl> <dbl>
## 1 2 472 40.7 4.83e-17
cat(sprintf("Conclusion: %s\n\n",
ifelse(levene_delfi$p < 0.05,
"Variances are NOT equal (use Welch's ANOVA or non-parametric)",
"Variances are equal (can use standard ANOVA if normal)")))## Conclusion: Variances are NOT equal (use Welch's ANOVA or non-parametric)
## TF_Score:
## # A tibble: 1 × 4
## df1 df2 statistic p
## <int> <int> <dbl> <dbl>
## 1 2 472 94.5 2.95e-35
cat(sprintf("Conclusion: %s\n\n",
ifelse(levene_tf$p < 0.05,
"Variances are NOT equal (use Welch's ANOVA or non-parametric)",
"Variances are equal (can use standard ANOVA if normal)")))## Conclusion: Variances are NOT equal (use Welch's ANOVA or non-parametric)
## mtcfDNA_fraction:
## # A tibble: 1 × 4
## df1 df2 statistic p
## <int> <int> <dbl> <dbl>
## 1 2 472 26.9 8.82e-12
cat(sprintf("Conclusion: %s\n\n",
ifelse(levene_mt$p < 0.05,
"Variances are NOT equal (use Welch's ANOVA or non-parametric)",
"Variances are equal (can use standard ANOVA if normal)")))## Conclusion: Variances are NOT equal (use Welch's ANOVA or non-parametric)
##
## ----------------------------------------
## Decision for DELFI_Score :
## ----------------------------------------
## ✗ Data is NOT normally distributed
## → RECOMMENDATION: Use Kruskal-Wallis with Dunn's post-hoc
##
## ----------------------------------------
## Decision for TF_Score :
## ----------------------------------------
## ✗ Data is NOT normally distributed
## → RECOMMENDATION: Use Kruskal-Wallis with Dunn's post-hoc
##
## ----------------------------------------
## Decision for mtcfDNA_fraction :
## ----------------------------------------
## ✗ Data is NOT normally distributed
## → RECOMMENDATION: Use Kruskal-Wallis with Dunn's post-hoc
##
##
## ==================================================
## FINAL STATISTICAL TEST RECOMMENDATIONS
## ==================================================
## Biomarker Normal_Distribution Equal_Variances Has_Outliers
## 1 DELFI_Score FALSE FALSE TRUE
## 2 TF_Score FALSE FALSE TRUE
## 3 mtcfDNA_fraction FALSE FALSE TRUE
## Recommended_Test
## 1 Kruskal-Wallis
## 2 Kruskal-Wallis
## 3 Kruskal-Wallis
# ============================================
# 5. APPLY APPROPRIATE STATISTICAL TESTS
# ============================================
perform_appropriate_analysis <- function(data, variable_name, test_type) {
cat("\n", paste(rep("=", 50), collapse=""), "\n")
cat("ANALYSIS FOR", variable_name, "using", test_type, "\n")
cat(paste(rep("=", 50), collapse=""), "\n\n")
if (test_type == "ANOVA") {
# Parametric ANOVA
aov_result <- aov(as.formula(paste(variable_name, "~ Group")), data = data)
cat("One-way ANOVA:\n")
print(summary(aov_result))
# Tukey's HSD post-hoc
tukey_result <- TukeyHSD(aov_result)
cat("\nTukey's HSD Post-hoc:\n")
print(tukey_result)
} else if (test_type == "Welch") {
# Welch's ANOVA
welch_result <- oneway.test(as.formula(paste(variable_name, "~ Group")),
data = data, var.equal = FALSE)
cat("Welch's ANOVA:\n")
print(welch_result)
# Games-Howell post-hoc
library(rstatix)
gh_result <- data %>%
games_howell_test(as.formula(paste(variable_name, "~ Group")))
cat("\nGames-Howell Post-hoc:\n")
print(gh_result)
} else {
# Kruskal-Wallis (non-parametric)
kw_result <- kruskal.test(as.formula(paste(variable_name, "~ Group")), data = data)
cat("Kruskal-Wallis test:\n")
print(kw_result)
# Dunn's post-hoc
dunn_result <- data %>%
dunn_test(as.formula(paste(variable_name, "~ Group")),
p.adjust.method = "bonferroni")
cat("\nDunn's Post-hoc (Bonferroni adjusted):\n")
print(dunn_result)
}
}
# Apply appropriate tests based on decisions
perform_appropriate_analysis(data_clean, "DELFI_Score", delfi_decision)##
## ==================================================
## ANALYSIS FOR DELFI_Score using Kruskal-Wallis
## ==================================================
##
## Kruskal-Wallis test:
##
## Kruskal-Wallis rank sum test
##
## data: DELFI_Score by Group
## Kruskal-Wallis chi-squared = 166.62, df = 2, p-value < 2.2e-16
##
##
## Dunn's Post-hoc (Bonferroni adjusted):
## # A tibble: 3 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 DELFI_Score Healt… CLD 150 186 10.2 1.91e-24 5.72e-24 ****
## 2 DELFI_Score Healt… HCC 150 139 12.0 2.26e-33 6.78e-33 ****
## 3 DELFI_Score CLD HCC 186 139 2.65 7.98e- 3 2.40e- 2 *
##
## ==================================================
## ANALYSIS FOR TF_Score using Kruskal-Wallis
## ==================================================
##
## Kruskal-Wallis test:
##
## Kruskal-Wallis rank sum test
##
## data: TF_Score by Group
## Kruskal-Wallis chi-squared = 370.14, df = 2, p-value < 2.2e-16
##
##
## Dunn's Post-hoc (Bonferroni adjusted):
## # A tibble: 3 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 TF_Score Healthy CLD 150 186 12.3 9.43e-35 2.83e-34 ****
## 2 TF_Score Healthy HCC 150 139 19.0 1.18e-80 3.54e-80 ****
## 3 TF_Score CLD HCC 186 139 7.94 2.09e-15 6.28e-15 ****
##
## ==================================================
## ANALYSIS FOR mtcfDNA_fraction using Kruskal-Wallis
## ==================================================
##
## Kruskal-Wallis test:
##
## Kruskal-Wallis rank sum test
##
## data: mtcfDNA_fraction by Group
## Kruskal-Wallis chi-squared = 234.91, df = 2, p-value < 2.2e-16
##
##
## Dunn's Post-hoc (Bonferroni adjusted):
## # A tibble: 3 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 mtcfDNA_fr… Healt… CLD 150 186 11.5 2.18e-30 6.53e-30 ****
## 2 mtcfDNA_fr… Healt… HCC 150 139 14.6 1.74e-48 5.22e-48 ****
## 3 mtcfDNA_fr… CLD HCC 186 139 4.15 3.30e- 5 9.90e- 5 ****
library(ggpubr)
library(rstatix)
library(tidyverse)
library(patchwork)
# Ensure groups are properly ordered
data_clean$Group <- factor(data_clean$Group,
levels = c("Healthy", "CLD", "HCC"))
# ============================================
# 1. DELFI_Score Boxplot
# ============================================
# Perform Dunn's test for DELFI_Score
dunn_delfi <- data_clean %>%
dunn_test(DELFI_Score ~ Group, p.adjust.method = "bonferroni") %>%
add_significance() %>%
add_xy_position(x = "Group", step.increase = 0.08)
# Create boxplot for DELFI_Score
boxplot_delfi <- ggboxplot(data_clean,
x = "Group",
y = "DELFI_Score",
fill = "Group",
palette = c("#4CAF50", "#FFC107", "#F44336"),
add = "jitter",
add.params = list(alpha = 0.3, size = 1.5)) +
stat_pvalue_manual(dunn_delfi,
label = "p.adj.signif",
tip.length = 0.01,
hide.ns = FALSE,
size = 4) +
labs(title = "DELFI Score Distribution by Group",
subtitle = "Kruskal-Wallis: p < 2.2e-16 | Post-hoc: Dunn's test (Bonferroni adjusted)",
y = "DELFI Score",
x = "Group") +
scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
theme_classic() +
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5, size = 10, face = "italic"),
axis.title = element_text(size = 12),
axis.text = element_text(size = 11))
print(boxplot_delfi)ggsave("delfi_score_boxplot_final.png", boxplot_delfi, width = 8, height = 6, dpi = 300)
# ============================================
# 2. TF_Score Boxplot
# ============================================
# Perform Dunn's test for TF_Score
dunn_tf <- data_clean %>%
dunn_test(TF_Score ~ Group, p.adjust.method = "bonferroni") %>%
add_significance() %>%
add_xy_position(x = "Group", step.increase = 0.08)
# Create boxplot for TF_Score
boxplot_tf <- ggboxplot(data_clean,
x = "Group",
y = "TF_Score",
fill = "Group",
palette = c("#4CAF50", "#FFC107", "#F44336"),
add = "jitter",
add.params = list(alpha = 0.3, size = 1.5)) +
stat_pvalue_manual(dunn_tf,
label = "p.adj.signif",
tip.length = 0.01,
hide.ns = FALSE,
size = 4) +
labs(title = "TF Score Distribution by Group",
subtitle = "Kruskal-Wallis: p < 2.2e-16 | Post-hoc: Dunn's test (Bonferroni adjusted)",
y = "TF Score",
x = "Group") +
scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
theme_classic() +
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5, size = 10, face = "italic"),
axis.title = element_text(size = 12),
axis.text = element_text(size = 11))
print(boxplot_tf)ggsave("tf_score_boxplot_final.png", boxplot_tf, width = 8, height = 6, dpi = 300)
# ============================================
# 3. mtcfDNA_fraction Boxplot
# ============================================
# Perform Dunn's test for mtcfDNA_fraction
dunn_mt <- data_clean %>%
dunn_test(mtcfDNA_fraction ~ Group, p.adjust.method = "bonferroni") %>%
add_significance() %>%
add_xy_position(x = "Group", step.increase = 0.08)
# Create boxplot for mtcfDNA_fraction
boxplot_mt <- ggboxplot(data_clean,
x = "Group",
y = "mtcfDNA_fraction",
fill = "Group",
palette = c("#4CAF50", "#FFC107", "#F44336"),
add = "jitter",
add.params = list(alpha = 0.3, size = 1.5)) +
stat_pvalue_manual(dunn_mt,
label = "p.adj.signif",
tip.length = 0.01,
hide.ns = FALSE,
size = 4) +
labs(title = "mtcfDNA Fraction Distribution by Group",
subtitle = "Kruskal-Wallis: p < 2.2e-16 | Post-hoc: Dunn's test (Bonferroni adjusted)",
y = "mtcfDNA Fraction",
x = "Group") +
scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
theme_classic() +
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = element_text(hjust = 0.5, size = 10, face = "italic"),
axis.title = element_text(size = 12),
axis.text = element_text(size = 11))
print(boxplot_mt)ggsave("mtcfdna_fraction_boxplot_final.png", boxplot_mt, width = 8, height = 6, dpi = 300)
# ============================================
# 7. Create Summary Table
# ============================================
# Extract p-values from Dunn's tests
summary_pvalues <- bind_rows(
dunn_delfi %>%
select(group1, group2, p.adj, p.adj.signif) %>%
mutate(Biomarker = "DELFI_Score"),
dunn_tf %>%
select(group1, group2, p.adj, p.adj.signif) %>%
mutate(Biomarker = "TF_Score"),
dunn_mt %>%
select(group1, group2, p.adj, p.adj.signif) %>%
mutate(Biomarker = "mtcfDNA_fraction")
) %>%
mutate(Comparison = paste(group1, "vs", group2),
p.adj = format(p.adj, scientific = TRUE, digits = 3)) %>%
select(Biomarker, Comparison, p.adj, p.adj.signif) %>%
arrange(Biomarker, Comparison)
print("Summary of Post-hoc Comparisons:")## [1] "Summary of Post-hoc Comparisons:"
## # A tibble: 9 × 4
## Biomarker Comparison p.adj p.adj.signif
## <chr> <chr> <chr> <chr>
## 1 DELFI_Score CLD vs HCC 2.40e-02 *
## 2 DELFI_Score Healthy vs CLD 5.72e-24 ****
## 3 DELFI_Score Healthy vs HCC 6.78e-33 ****
## 4 TF_Score CLD vs HCC 6.28e-15 ****
## 5 TF_Score Healthy vs CLD 2.83e-34 ****
## 6 TF_Score Healthy vs HCC 3.54e-80 ****
## 7 mtcfDNA_fraction CLD vs HCC 9.90e-05 ****
## 8 mtcfDNA_fraction Healthy vs CLD 6.53e-30 ****
## 9 mtcfDNA_fraction Healthy vs HCC 5.22e-48 ****
# ================================
# Panel-tagged boxplots with clear brackets
# ================================
library(ggpubr)
library(rstatix)
library(tidyverse)
library(patchwork)
# Ensure groups are ordered
data_clean$Group <- factor(data_clean$Group, levels = c("Healthy", "CLD", "HCC"))
# Helper to nudge bracket heights upward
raise_brackets <- function(df, y_buffer = 0.12) {
if (!"y.position" %in% names(df)) return(df)
df %>% mutate(y.position = y.position * (1 + y_buffer))
}
# -------------------------------
# 1) DELFI Score
# -------------------------------
dunn_delfi <- data_clean %>%
dunn_test(DELFI_Score ~ Group, p.adjust.method = "bonferroni") %>%
add_significance() %>%
add_xy_position(x = "Group", step.increase = 0.18) %>%
raise_brackets(y_buffer = 0.15)
boxplot_delfi_enhanced <- ggboxplot(
data_clean, x = "Group", y = "DELFI_Score",
fill = "Group",
palette = c("#4CAF50", "#FFC107", "#F44336"),
alpha = 0.8, width = 0.7,
add = "jitter",
add.params = list(alpha = 0.4, size = 1.2, color = "darkgray"),
outlier.shape = NA
) +
stat_pvalue_manual(
dunn_delfi,
label = "p.adj.signif",
tip.length = 0.02,
hide.ns = FALSE,
size = 5.5,
bracket.size = 1.2
) +
labs(title = "", y = "DELFI Score", x = "") +
scale_y_continuous(
expand = expansion(mult = c(0.05, 0.22)),
breaks = scales::pretty_breaks(n = 6)
) +
theme_classic(base_size = 11) +
theme(
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
axis.title.y = element_text(size = 16),
axis.text = element_text(size = 15),
axis.text.x = element_text(size = 15)
)
# -------------------------------
# 2) TF Score
# -------------------------------
dunn_tf <- data_clean %>%
dunn_test(TF_Score ~ Group, p.adjust.method = "bonferroni") %>%
add_significance() %>%
add_xy_position(x = "Group", step.increase = 0.18) %>%
raise_brackets(y_buffer = 0.15)
boxplot_tf_enhanced <- ggboxplot(
data_clean, x = "Group", y = "TF_Score",
fill = "Group",
palette = c("#4CAF50", "#FFC107", "#F44336"),
alpha = 0.8, width = 0.7,
add = "jitter",
add.params = list(alpha = 0.4, size = 1.2, color = "darkgray"),
outlier.shape = NA
) +
stat_pvalue_manual(
dunn_tf,
label = "p.adj.signif",
tip.length = 0.02,
hide.ns = FALSE,
size = 5.5,
bracket.size = 1.2
) +
labs(title = "", y = "TF Score", x = "") +
scale_y_continuous(
expand = expansion(mult = c(0.05, 0.22)),
breaks = scales::pretty_breaks(n = 6)
) +
theme_classic(base_size = 11) +
theme(
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
axis.title.y = element_text(size = 16),
axis.text = element_text(size = 15),
axis.text.x = element_text(size = 15)
)
# -------------------------------
# 3) mtcfDNA Fraction
# -------------------------------
dunn_mt <- data_clean %>%
dunn_test(mtcfDNA_fraction ~ Group, p.adjust.method = "bonferroni") %>%
add_significance() %>%
add_xy_position(x = "Group", step.increase = 0.18) %>%
raise_brackets(y_buffer = 0.15)
boxplot_mt_enhanced <- ggboxplot(
data_clean, x = "Group", y = "mtcfDNA_fraction",
fill = "Group",
palette = c("#4CAF50", "#FFC107", "#F44336"),
alpha = 0.8, width = 0.7,
add = "jitter",
add.params = list(alpha = 0.4, size = 1.2, color = "darkgray"),
outlier.shape = NA
) +
stat_pvalue_manual(
dunn_mt,
label = "p.adj.signif",
tip.length = 0.02,
hide.ns = FALSE,
size = 5.5,
bracket.size = 1.2
) +
labs(title = "", y = "mtcfDNA Fraction", x = "") +
scale_y_continuous(
expand = expansion(mult = c(0.05, 0.22)),
breaks = scales::pretty_breaks(n = 6)
) +
theme_classic(base_size = 11) +
theme(
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
axis.title.y = element_text(size = 16),
axis.text = element_text(size = 15),
axis.text.x = element_text(size = 15)
)
# -------------------------------
# 4) Summary statistics table
# -------------------------------
summary_stats <- data_clean %>%
pivot_longer(
cols = c(DELFI_Score, TF_Score, mtcfDNA_fraction),
names_to = "Biomarker", values_to = "Value"
) %>%
group_by(Biomarker, Group) %>%
summarise(
n = n(),
Mean = mean(Value),
SD = sd(Value),
Median = median(Value),
Q1 = quantile(Value, 0.25),
Q3 = quantile(Value, 0.75),
Min = min(Value),
Max = max(Value),
.groups = "drop"
) %>%
mutate(across(where(is.numeric), ~ round(., 4)))
print("Summary Statistics by Biomarker and Group:")## [1] "Summary Statistics by Biomarker and Group:"
## # A tibble: 9 × 10
## Biomarker Group n Mean SD Median Q1 Q3 Min Max
## <chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 DELFI_Score Healt… 150 0.194 0.0316 0.190 0.170 0.219 0.0921 0.267
## 2 DELFI_Score CLD 186 0.264 0.0681 0.248 0.220 0.292 0.165 0.703
## 3 DELFI_Score HCC 139 0.318 0.136 0.287 0.231 0.369 0.124 0.93
## 4 TF_Score Healt… 150 0.0014 0.0005 0.0013 0.0011 0.0015 0.001 0.0044
## 5 TF_Score CLD 186 0.0151 0.0178 0.0107 0.0096 0.0119 0.0034 0.151
## 6 TF_Score HCC 139 0.0856 0.0951 0.0514 0.0138 0.127 0.0056 0.569
## 7 mtcfDNA_fraction Healt… 150 0.0011 0.0006 0.0009 0.0007 0.0013 0.0003 0.0033
## 8 mtcfDNA_fraction CLD 186 0.0039 0.0032 0.0029 0.0017 0.0049 0.0005 0.0226
## 9 mtcfDNA_fraction HCC 139 0.0068 0.0078 0.0046 0.0028 0.0075 0.0008 0.0699
write.csv(summary_stats, "biomarker_summary_statistics.csv", row.names = FALSE)
# -------------------------------
# 5) Combine with A/B/C panel tags
# -------------------------------
combined_with_legend <-
(boxplot_delfi_enhanced + theme(legend.position = "none")) +
(boxplot_tf_enhanced + theme(legend.position = "none")) +
(boxplot_mt_enhanced + theme(
legend.position = "none",
legend.title = element_blank(),
legend.text = element_text(size = 10)
)) +
plot_layout(ncol = 3, widths = c(1, 1, 1), guides = "collect") +
plot_annotation(
tag_levels = "A", # adds A, B, C
title = "",
subtitle = "",
caption = "",
theme = theme(
plot.tag = element_text(face = "bold", size = 28, hjust = -0.2, vjust = 1.2),
plot.caption = element_text(size = 18, hjust = 0.5, color = "gray40")
)
)
print(combined_with_legend)# Load required libraries
library(ggplot2)
library(dplyr)
library(tidyr)
library(reshape2) # For melt function
library(pheatmap)
library(viridis)
library(RColorBrewer)
library(gridExtra)
library(dendextend)
# Read the CSV file
my_data <- read.csv("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 3 (Diagnostic Indicators for HCC)/HCC_Diagnostic_Tables/EndMotif_only.csv", stringsAsFactors = FALSE)
# Check column names
print("Column names in data:")## [1] "Column names in data:"
## [1] "Sample_ID" "Group" "AACC" "ACCC" "CCCT"
# Identify ID column
id_col <- if("Sample" %in% names(my_data)) "Sample" else
if("Sample_ID" %in% names(my_data)) "Sample_ID" else
names(my_data)[1]
print(paste("Using ID column:", id_col))## [1] "Using ID column: Sample_ID"
# Standardize column name to Sample_ID for consistency
if(id_col != "Sample_ID") {
my_data$Sample_ID <- my_data[[id_col]]
}
# Check what groups are present in the data
print("Groups present in the data:")## [1] "Groups present in the data:"
##
## CLD HCC Healthy
## 186 139 150
# Verify all three groups are present
if(!all(c("HCC", "CLD", "Healthy") %in% unique(my_data$Group))) {
warning("Not all three groups (HCC, CLD, Healthy) are present in the data!")
print("Available groups:")
print(unique(my_data$Group))
}# ========== STATISTICAL ANALYSIS ==========
# Identify motif columns (exclude metadata columns)
metadata_cols <- c("Sample_ID", "Group", id_col)
motif_cols <- setdiff(names(my_data), metadata_cols)
print(paste("Number of motif columns found:", length(motif_cols)))## [1] "Number of motif columns found: 256"
## [1] "First 5 motif columns:"
## [1] "AAAA" "AAAC" "AAAG" "AAAT" "AACA"
# Melt the data for analysis
data_melted <- my_data %>%
select(Sample_ID, Group, all_of(motif_cols)) %>%
pivot_longer(cols = all_of(motif_cols),
names_to = "Motif",
values_to = "Frequency")
# Statistical analysis for each motif
motif_statistics <- data_melted %>%
group_by(Motif) %>%
summarise(
# Calculate means for each group
mean_HCC = mean(Frequency[Group == "HCC"], na.rm = TRUE),
mean_CLD = mean(Frequency[Group == "CLD"], na.rm = TRUE),
mean_Healthy = mean(Frequency[Group == "Healthy"], na.rm = TRUE),
# Calculate standard deviations
sd_HCC = sd(Frequency[Group == "HCC"], na.rm = TRUE),
sd_CLD = sd(Frequency[Group == "CLD"], na.rm = TRUE),
sd_Healthy = sd(Frequency[Group == "Healthy"], na.rm = TRUE),
# Overall variance
variance_across_groups = var(Frequency, na.rm = TRUE),
# Calculate fold changes
fc_HCC_vs_Healthy = log2((mean_HCC + 0.0001) / (mean_Healthy + 0.0001)),
fc_CLD_vs_Healthy = log2((mean_CLD + 0.0001) / (mean_Healthy + 0.0001)),
fc_HCC_vs_CLD = log2((mean_HCC + 0.0001) / (mean_CLD + 0.0001)),
# Maximum absolute fold change
max_abs_fc = max(abs(c(fc_HCC_vs_Healthy, fc_CLD_vs_Healthy, fc_HCC_vs_CLD))),
# CLD vs HCC specific fold change (absolute)
abs_fc_CLD_HCC = abs(fc_HCC_vs_CLD),
# Perform ANOVA
anova_p = tryCatch({
motif_data_temp <- data_melted[data_melted$Motif == first(Motif),]
if(length(unique(motif_data_temp$Group)) > 1 && nrow(motif_data_temp) > 3) {
aov_result <- aov(Frequency ~ Group, data = motif_data_temp)
summary(aov_result)[[1]][["Pr(>F)"]][1]
} else {
NA
}
}, error = function(e) NA),
# Perform t-test for CLD vs HCC
ttest_CLD_HCC_p = tryCatch({
t.test(Frequency[Group == "CLD"], Frequency[Group == "HCC"])$p.value
}, error = function(e) NA),
# Identify which group has highest mean
highest_group = case_when(
mean_HCC == max(c(mean_HCC, mean_CLD, mean_Healthy), na.rm = TRUE) ~ "HCC",
mean_CLD == max(c(mean_HCC, mean_CLD, mean_Healthy), na.rm = TRUE) ~ "CLD",
mean_Healthy == max(c(mean_HCC, mean_CLD, mean_Healthy), na.rm = TRUE) ~ "Healthy",
TRUE ~ "Equal"
)
) %>%
ungroup()
# Apply FDR correction
motif_statistics$anova_p_adj <- p.adjust(motif_statistics$anova_p, method = "BH")
motif_statistics$ttest_CLD_HCC_p_adj <- p.adjust(motif_statistics$ttest_CLD_HCC_p, method = "BH")
# Create scoring system
motif_statistics <- motif_statistics %>%
mutate(
# Combined score considering fold change and statistical significance
combined_score = max_abs_fc * -log10(anova_p_adj + 0.0001),
# CLD vs HCC specific score
CLD_HCC_score = abs_fc_CLD_HCC * -log10(ttest_CLD_HCC_p_adj + 0.0001)
)# Top 50 by combined score (overall significance)
top_50_combined <- motif_statistics %>%
filter(!is.na(anova_p_adj)) %>%
arrange(desc(combined_score)) %>%
slice_head(n = 50)
print("Top 50 most significant motifs selected")## [1] "Top 50 most significant motifs selected"
print(paste("Mean fold change (HCC vs Healthy):", round(mean(top_50_combined$fc_HCC_vs_Healthy), 3)))## [1] "Mean fold change (HCC vs Healthy): -0.047"
print(paste("Mean fold change (CLD vs Healthy):", round(mean(top_50_combined$fc_CLD_vs_Healthy), 3)))## [1] "Mean fold change (CLD vs Healthy): -0.094"
## [1] "Mean fold change (HCC vs CLD): 0.047"
# ========== PREPARE DATA FOR HEATMAPS ==========
# Prepare heatmap data matrix
heatmap_data <- my_data %>%
select(Sample_ID, all_of(motif_cols))
# Set row names and remove ID column
rownames(heatmap_data) <- heatmap_data$Sample_ID
heatmap_data$Sample_ID <- NULL
# Create annotation for samples
annotation_row <- data.frame(
Group = my_data$Group,
row.names = my_data$Sample_ID
)
# Set group order
group_order <- c("Healthy", "CLD", "HCC")
annotation_row$Group <- factor(annotation_row$Group, levels = group_order)
# ========== CLUSTERING FUNCTION ==========
order_samples_by_group_enhanced <- function(annotation_data, zscore_matrix, group_order) {
ordered_samples <- c()
for(grp in group_order) {
if(grp %in% unique(annotation_data$Group)) {
# Get sample names for this group
group_sample_names <- rownames(annotation_data)[annotation_data$Group == grp]
# Make sure samples exist in the zscore matrix
group_sample_names <- intersect(group_sample_names, rownames(zscore_matrix))
if(length(group_sample_names) > 2) {
group_matrix <- zscore_matrix[group_sample_names, , drop = FALSE]
# Use correlation distance for better separation
dist_matrix <- as.dist(1 - cor(t(group_matrix)))
hc <- hclust(dist_matrix, method = "ward.D2")
ordered_samples <- c(ordered_samples, group_sample_names[hc$order])
} else if(length(group_sample_names) > 0) {
ordered_samples <- c(ordered_samples, group_sample_names)
}
}
}
return(ordered_samples)
}
# Get top 50 motif names and verify they exist in data
top_50_motif_names <- as.character(top_50_combined$Motif)
available_motifs <- intersect(top_50_motif_names, colnames(heatmap_data))
print(paste("Found", length(available_motifs), "of", length(top_50_motif_names), "top motifs in data"))## [1] "Found 50 of 50 top motifs in data"
# Filter heatmap data for available top 50 motifs
heatmap_data_top50 <- heatmap_data[, available_motifs, drop = FALSE]
# Calculate z-scores
heatmap_data_zscore_50 <- scale(heatmap_data_top50)
# Order samples
ordered_samples_50 <- order_samples_by_group_enhanced(annotation_row, heatmap_data_zscore_50, group_order)
# Perform hierarchical clustering on motifs
motif_matrix <- heatmap_data_zscore_50[ordered_samples_50, ]
motif_dist <- dist(t(motif_matrix), method = "euclidean")
motif_hclust <- hclust(motif_dist, method = "complete")
motif_dendro <- as.dendrogram(motif_hclust)
# Order motifs by clustering
motif_order_clustered <- labels(motif_dendro)
# Perform hierarchical clustering within each group for samples
cluster_samples_within_group <- function(data_matrix, group_info) {
all_ordered_samples <- c()
for(grp in c("Healthy", "CLD", "HCC")) {
group_samples <- rownames(group_info)[group_info$Group == grp]
group_samples <- intersect(group_samples, rownames(data_matrix))
if(length(group_samples) > 2) {
group_data <- data_matrix[group_samples, ]
sample_dist <- dist(group_data, method = "euclidean")
sample_hclust <- hclust(sample_dist, method = "complete")
all_ordered_samples <- c(all_ordered_samples, group_samples[sample_hclust$order])
} else {
all_ordered_samples <- c(all_ordered_samples, group_samples)
}
}
return(all_ordered_samples)
}
# Get clustered sample order
samples_clustered <- cluster_samples_within_group(heatmap_data_zscore_50, annotation_row)
# Prepare data for clustered heatmap
heatmap_long_clustered <- heatmap_data_zscore_50 %>%
as.data.frame() %>%
mutate(Sample_ID = rownames(.)) %>%
pivot_longer(cols = -Sample_ID, names_to = "Motif", values_to = "Z_score") %>%
left_join(my_data[, c("Sample_ID", "Group")], by = "Sample_ID")
# Set factor levels based on clustering
heatmap_long_clustered$Sample_ID <- factor(heatmap_long_clustered$Sample_ID,
levels = samples_clustered)
heatmap_long_clustered$Motif <- factor(heatmap_long_clustered$Motif,
levels = motif_order_clustered)
heatmap_long_clustered$Group <- factor(heatmap_long_clustered$Group,
levels = c("Healthy", "CLD", "HCC"))
# Create enhanced clustered heatmap
p_heatmap_clustered <- ggplot(heatmap_long_clustered,
aes(x = Sample_ID, y = Motif, fill = Z_score)) +
geom_tile(color = NA) + # Remove gridlines for cleaner look
scale_fill_gradient2(
low = "darkblue",
mid = "white",
high = "darkred",
midpoint = 0,
limits = c(-3, 3),
oob = scales::squish,
name = "Z-score",
breaks = c(-3, -2, -1, 0, 1, 2, 3)
) +
facet_grid(. ~ Group, scales = "free_x", space = "free_x") +
theme_minimal() +
theme(
axis.text.x = element_blank(),
axis.ticks = element_blank(),
axis.text.y = element_text(size = 12, hjust = 1),
axis.title.x = element_text(size = 18, face = "bold", margin = margin(t = 10)),
axis.title.y = element_text(size = 18, face = "bold", margin = margin(r = 10)),
plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 11, hjust = 0.5, face = "italic"),
strip.text = element_text(size = 14, face = "bold", color = "white"),
strip.background = element_rect(fill = "gray30", color = "gray30"),
legend.position = "right",
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 14),
legend.key.height = unit(1.2, "cm"),
legend.key.width = unit(0.6, "cm"),
panel.spacing.x = unit(0.3, "lines"),
panel.grid = element_blank(),
panel.background = element_rect(fill = "white"),
plot.background = element_rect(fill = "white")
) +
labs(
title = "",
subtitle = "",
x = "Samples(grouped and clustered)",
y = "5'End Motifs"
)
print(p_heatmap_clustered)ggsave("ggplot_heatmap_top50_clustered.png", p_heatmap_clustered,
width = 16, height = 12, dpi = 500)library(ggplot2)
library(dplyr)
library(tidytext)
# Prepare q-values
sig_df <- motif_statistics %>%
transmute(Motif = as.character(Motif), q = ttest_CLD_HCC_p_adj)
# Top 10 per direction
top10 <- top_50_combined %>%
mutate(Motif = as.character(Motif)) %>%
filter(!is.na(fc_HCC_vs_CLD)) %>%
mutate(Direction = ifelse(fc_HCC_vs_CLD > 0, "Up in HCC", "Up in CLD")) %>%
group_by(Direction) %>%
slice_max(order_by = abs(fc_HCC_vs_CLD), n = 10, with_ties = FALSE) %>%
ungroup() %>%
left_join(sig_df, by = "Motif") %>%
mutate(
star = case_when(
is.na(q) ~ "ns",
q < 1e-4 ~ "****",
q < 1e-3 ~ "***",
q < 1e-2 ~ "**",
q < 0.05 ~ "*",
TRUE ~ "ns"
),
star_col = ifelse(star == "ns", "grey40", "black")
)
# Reorder motifs for x-axis
top10 <- top10 %>%
arrange(fc_HCC_vs_CLD) %>%
mutate(Motif = factor(Motif, levels = Motif))
pad <- 0.06 * max(abs(top10$fc_HCC_vs_CLD), na.rm = TRUE)
# Plot
p <- ggplot(top10, aes(x = Motif, y = fc_HCC_vs_CLD, fill = Direction)) +
geom_col(width = 0.7, color = "grey20") +
geom_hline(yintercept = 0, linewidth = 1.1, color = "black") +
geom_text(
aes(label = star,
y = ifelse(fc_HCC_vs_CLD > 0, fc_HCC_vs_CLD + pad, fc_HCC_vs_CLD - pad),
color = star_col),
size = 6, fontface = "bold", show.legend = FALSE
) +
scale_color_identity() +
scale_fill_manual(values = c("Up in HCC" = "#E74C3C", "Up in CLD" = "#F39C12")) +
labs(
title = "",
subtitle = "",
x = "5' End motif",
y = "Log2 fold change"
) +
theme_minimal(base_size = 14) +
theme(
legend.position = "top",
legend.title = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(color = "grey85"),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1),
axis.text.y = element_text(size = 12),
plot.margin = margin(10, 20, 10, 10)
)
print(p)# Select top 10 motifs for trajectory visualization
top_10_trajectory <- top_50_combined %>%
arrange(desc(combined_score)) %>%
slice_head(n = 10)
trajectory_data <- top_10_trajectory %>%
select(Motif, mean_Healthy, mean_CLD, mean_HCC) %>%
pivot_longer(cols = starts_with("mean_"),
names_to = "Group",
values_to = "Mean_Expression") %>%
mutate(
Group = gsub("mean_", "", Group),
Group = factor(Group, levels = c("Healthy", "CLD", "HCC"))
)
p_trajectory <- ggplot(trajectory_data,
aes(x = Group, y = log2(Mean_Expression + 0.0001),
group = Motif, color = Motif)) +
geom_line(size = 1.2, alpha = 0.8) +
geom_point(size = 3) +
scale_color_viridis_d(option = "turbo") +
labs(
title = "Expression Trajectories: Top 10 Motifs",
subtitle = "Disease progression from Healthy to HCC",
x = "Disease State",
y = "Log2(Mean Expression)",
color = "Motif"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 11, hjust = 0.5),
legend.position = "right",
legend.text = element_text(size = 8)
)
print(p_trajectory)library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
library(ggpubr)
library(ggsignif)
# ---- Select top 6 motifs ----
top_6_motifs <- top_50_combined %>%
arrange(desc(combined_score)) %>%
slice_head(n = 6)
print("Top 6 motifs selected for violin plots:")## [1] "Top 6 motifs selected for violin plots:"
## # A tibble: 6 × 4
## Motif highest_group fc_HCC_vs_CLD anova_p_adj
## <chr> <chr> <dbl> <dbl>
## 1 GCTT Healthy -0.0855 1.70e-254
## 2 CTGG HCC 0.233 7.90e- 82
## 3 CTCG HCC 0.206 5.06e- 73
## 4 CCGG HCC 0.236 4.39e- 52
## 5 GCTG Healthy -0.0158 1.29e-195
## 6 GCTC Healthy -0.0291 2.98e-196
# ---- Prepare data ----
violin_data <- data_melted %>%
filter(Motif %in% top_6_motifs$Motif) %>%
left_join(top_6_motifs[, c("Motif", "highest_group")], by = "Motif") %>%
mutate(
Group = factor(Group, levels = c("Healthy", "CLD", "HCC")),
Percentage = Frequency * 100 # still numeric, but we will not print "%"
)
# Define colors for groups
group_colors <- c("Healthy" = "#27AE60", "CLD" = "#F39C12", "HCC" = "#E74C3C")
comparisons <- list(c("Healthy", "CLD"), c("CLD", "HCC"), c("Healthy", "HCC"))
# ---------- Plot function ----------
create_violin_plot <- function(motif_name, data_subset) {
d <- data_subset %>% filter(Motif == motif_name)
highest_grp <- dplyr::first(d$highest_group)
y_max <- max(d$Percentage, na.rm = TRUE)
y_lim <- c(0, y_max * 1.35)
ggplot(d, aes(x = Group, y = Percentage, fill = Group)) +
geom_violin(trim = FALSE, width = 0.75, alpha = 0.9, color = "white") +
geom_boxplot(width = 0.16, fill = "white", alpha = 0.9,
outlier.shape = NA, linewidth = 0.4) +
geom_jitter(width = 0.08, alpha = 0.35, size = 0.9) +
scale_fill_manual(values = group_colors) +
scale_y_continuous(limits = y_lim, expand = c(0.02, 0)) +
# Pairwise significance stars (with "ns" shown)
stat_compare_means(
comparisons = comparisons,
method = "t.test",
label = "p.signif",
hide.ns = FALSE, # <-- show "ns"
bracket.size = 0.6,
step.increase = 0.15,
size = 7, # <-- larger stars
color = "black"
) +
labs(
title = paste0("Motif: ", motif_name),
subtitle = paste0("Highest in: ", highest_grp),
x = "Group",
y = "Frequency (%)"
) +
coord_cartesian(clip = "off") + # prevents stars from being clipped
theme_minimal(base_size = 12) +
theme(
legend.position = "none",
plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 11, hjust = 0.5),
axis.title.x = element_text(size = 13, face = "bold", margin = margin(t = 6)),
axis.title.y = element_text(size = 13, face = "bold", margin = margin(r = 6)),
axis.text.x = element_text(size = 12, face = "bold"),
axis.text.y = element_text(size = 12),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
plot.margin = margin(10, 20, 10, 10)
)
}
# ---------- Build plots ----------
plot_list <- lapply(top_6_motifs$Motif, create_violin_plot, data_subset = violin_data)
# Arrange 2x3 grid
combined_violin_plots <- grid.arrange(
grobs = plot_list,
ncol = 3, nrow = 2,
top = grid::textGrob("",
gp = grid::gpar(fontsize = 16, fontface = "bold"))
)## TableGrob (3 x 3) "arrange": 7 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (2-2,3-3) arrange gtable[layout]
## 4 4 (3-3,1-1) arrange gtable[layout]
## 5 5 (3-3,2-2) arrange gtable[layout]
## 6 6 (3-3,3-3) arrange gtable[layout]
## 7 7 (1-1,1-3) arrange text[GRID.text.1533]
# Summary statistics for top 50
summary_top50 <- top_50_combined %>%
summarise(
Total_Motifs = n(),
Mean_FC_HCC_vs_CLD = round(mean(fc_HCC_vs_CLD), 3),
Mean_FC_HCC_vs_Healthy = round(mean(fc_HCC_vs_Healthy), 3),
Mean_FC_CLD_vs_Healthy = round(mean(fc_CLD_vs_Healthy), 3),
Motifs_Higher_in_HCC = sum(fc_HCC_vs_CLD > 0.5),
Motifs_Higher_in_CLD = sum(fc_HCC_vs_CLD < -0.5),
Significant_CLD_HCC = sum(ttest_CLD_HCC_p_adj < 0.05, na.rm = TRUE)
)
write.csv(top_50_combined, "top50_significant_motifs.csv", row.names = FALSE)
write.csv(summary_top50, "top50_summary_statistics.csv", row.names = FALSE)
# Final statistics
cat("\n================================================================================\n")##
## ================================================================================
## TOP 50 MOTIFS SUMMARY
## ================================================================================
## # A tibble: 1 × 7
## Total_Motifs Mean_FC_HCC_vs_CLD Mean_FC_HCC_vs_Healthy Mean_FC_CLD_vs_Healthy
## <int> <dbl> <dbl> <dbl>
## 1 50 0.047 -0.047 -0.094
## # ℹ 3 more variables: Motifs_Higher_in_HCC <int>, Motifs_Higher_in_CLD <int>,
## # Significant_CLD_HCC <int>
##
## Top 5 motifs with highest HCC vs CLD fold change:
print(top_50_combined %>%
arrange(desc(fc_HCC_vs_CLD)) %>%
slice_head(n = 5) %>%
select(Motif, fc_HCC_vs_CLD, ttest_CLD_HCC_p_adj))## # A tibble: 5 × 3
## Motif fc_HCC_vs_CLD ttest_CLD_HCC_p_adj
## <chr> <dbl> <dbl>
## 1 CCGG 0.236 1.27e- 5
## 2 CTGG 0.233 9.52e- 8
## 3 CTCG 0.206 3.49e- 6
## 4 TTGG 0.205 7.45e- 7
## 5 ATGG 0.192 2.99e-11
##
## Top 5 motifs with highest CLD vs HCC fold change:
print(top_50_combined %>%
arrange(fc_HCC_vs_CLD) %>%
slice_head(n = 5) %>%
select(Motif, fc_HCC_vs_CLD, ttest_CLD_HCC_p_adj))## # A tibble: 5 × 3
## Motif fc_HCC_vs_CLD ttest_CLD_HCC_p_adj
## <chr> <dbl> <dbl>
## 1 ACTT -0.130 0.000000265
## 2 TCTA -0.121 0.000000265
## 3 TATG -0.101 0.0000127
## 4 GCAT -0.0928 0.00000193
## 5 TCTT -0.0886 0.00000168
## ================================================================================