Load data

## [1] 245955000

PreciseTAD

Use optimal harmonic = 1100 (found in scipt 17)

Arm1

## # A tibble: 6 × 4
## # Groups:   left_TAD, center_TAD [6]
##   left_TAD center_TAD right_TAD  Freq
##      <int>      <int>     <int> <int>
## 1      181        345       512     1
## 2      483        497       512     1
## 3      512        600       692     1
## 4      512        617       732     1
## 5      512        622       732     1
## 6      512        641       779     1
## # A tibble: 767 × 4
## # Groups:   left_TAD, center_TAD [766]
##    left_TAD center_TAD right_TAD  Freq
##       <int>      <int>     <int> <int>
##  1      181        345       512     1
##  2      483        497       512     1
##  3      512        600       692     1
##  4      512        617       732     1
##  5      512        622       732     1
##  6      512        641       779     1
##  7      512        673       835     1
##  8      512        799      1103     1
##  9      512        816      1103     1
## 10      512        855      1182     1
## # ℹ 757 more rows

Arm 2

## # A tibble: 6 × 4
## # Groups:   left_TAD, center_TAD [6]
##   left_TAD center_TAD right_TAD  Freq
##      <int>      <int>     <int> <int>
## 1       46       1401      2872     1
## 2       46       1420      2872     1
## 3       46       1459      2872     1
## 4       46       1507      2872     1
## 5      349       1554      2872     1
## 6      349       1577      2872     1
## # A tibble: 767 × 4
## # Groups:   left_TAD, center_TAD [767]
##    left_TAD center_TAD right_TAD  Freq
##       <int>      <int>     <int> <int>
##  1       46       1401      2872     1
##  2       46       1420      2872     1
##  3       46       1459      2872     1
##  4       46       1507      2872     1
##  5      349       1554      2872     1
##  6      349       1577      2872     1
##  7      349       1609      2872     1
##  8      349       1630      2872     1
##  9      689       1731      2872     1
## 10      689       1764      2872     1
## # ℹ 757 more rows

Difference of IS based on precise TAD

Arm 1

left_TAD right_TAD center_TAD averageProb_diff error_symetric overlap actual_size t.IS._left t.IS._right avg_t.IS.
900000 2555000 1720000 0.6347220 0.0182927 0 1655000 0.0765050 0.2374860 0.0765050
2410000 2555000 2480000 0.5139072 0.0714286 0 145000 0.8778770 0.2411200 0.8778770
2555000 3455000 2995000 0.8111084 0.0454545 0 900000 -0.2473022 0.2166828 -0.2473022
2555000 3655000 3080000 0.8177772 0.0952381 0 1100000 -0.2080633 -0.3796683 -0.2080633
2555000 3655000 3105000 0.8356176 0.0000000 0 1100000 -0.1939253 -0.3655303 -0.1939253
2555000 3890000 3200000 0.8205000 0.0697674 0 1335000 -0.1754102 0.5393428 -0.1754102

Arm 2

left_TAD right_TAD center_TAD averageProb_diff error_symetric overlap actual_size t.IS._left t.IS._right avg_t.IS.
140055000 157180000 149825000 0.5236988 0.0856089 10 17125000 NA NA NA
140055000 157180000 149920000 0.3829540 0.0567686 10 17125000 NA NA NA
140055000 157180000 150115000 0.4454908 0.0000000 10 17125000 NA NA NA
140055000 157180000 150355000 0.5811924 0.0703297 10 17125000 NA NA NA
143700000 157180000 150590000 0.3097900 0.0937759 10 13480000 NA NA NA
143700000 157180000 150705000 0.6360280 0.0545603 10 13480000 NA NA NA

Filter TAD

Any TAD with same boundaries, choose the one with smallest error of symmetric

Function

Arm1

Report Measurement 1

left_TAD right_TAD center_TAD averageProb_diff error_symetric overlap actual_size t.IS._left t.IS._right avg_t.IS.
1 900000 2555000 1720000 0.6347220 0.0182927 0 1655000 0.0765050 0.2374860 0.0765050
2 2410000 2555000 2480000 0.5139072 0.0714286 0 145000 0.8778770 0.2411200 0.8778770
3 2555000 3455000 2995000 0.8111084 0.0454545 0 900000 -0.2473022 0.2166828 -0.2473022
4 2555000 3655000 3080000 0.8177772 0.0952381 0 1100000 -0.2080633 -0.3796683 -0.2080633
6 2555000 3890000 3200000 0.8205000 0.0697674 0 1335000 -0.1754102 0.5393428 -0.1754102
7 2555000 4170000 3360000 0.7471808 0.0062112 0 1615000 -0.4127700 0.0776680 -0.4127700

Report Figure 1

Arm2

Report Measurement 2

left_TAD right_TAD center_TAD averageProb_diff error_symetric overlap actual_size t.IS._left t.IS._right avg_t.IS.
9 145715000 157180000 151475000 0.4175512 0.0950096 10 11465000 -0.1312420 0.4827650 -0.1312420
14 147090000 157180000 152585000 0.1761212 0.0783460 10 10090000 -0.0410526 0.3227817 -0.0410526
15 147265000 157180000 152710000 0.4930088 0.0973154 10 9915000 0.3108627 0.3144157 0.3108627
16 147500000 157180000 152805000 0.4636148 0.0891429 10 9680000 0.0637285 0.2497180 0.0637285
17 147710000 157180000 152905000 0.3734940 0.0888889 10 9470000 0.1615609 0.4046250 0.1615609
21 149960000 157180000 153725000 0.2804360 0.0897250 10 7220000 -0.2658370 0.8375370 -0.2658370

Report Figure 2

Diff IS and PreciseTAD boundaries bp

## [1] 663
## [1] 1396

Found closest IS boundary (as truth). Find error from preciseTAD

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0   15000   45000   77852  115000  675000

## [1] 52

Scan for preciseTAD boundaries at each window size

Number of boundary at each searching windows

## [[1]]
##   start_window end_window n_boundaries
## 1            0      49999            0
## 2        50000      99999            0
## 3       100000     149999            0
## 4       150000     199999            0
## 5       200000     249999            0
## 6       250000     299999            0
## 
## [[2]]
##   start_window end_window n_boundaries
## 1        0e+00      99999            0
## 2        1e+05     199999            0
## 3        2e+05     299999            0
## 4        3e+05     399999            0
## 5        4e+05     499999            0
## 6        5e+05     599999            0
## 
## [[3]]
##   start_window end_window n_boundaries
## 1            0     149999            0
## 2       150000     299999            0
## 3       300000     449999            0
## 4       450000     599999            0
## 5       600000     749999            0
## 6       750000     899999            0
## 
## [[4]]
##   start_window end_window n_boundaries
## 1        0e+00     199999            0
## 2        2e+05     399999            0
## 3        4e+05     599999            0
## 4        6e+05     799999            0
## 5        8e+05     999999            1
## 6        1e+06    1199999            1
## 
## [[5]]
##   start_window end_window n_boundaries
## 1            0     249999            0
## 2       250000     499999            0
## 3       500000     749999            0
## 4       750000     999999            1
## 5      1000000    1249999            1
## 6      1250000    1499999            0
## 
## [[6]]
##   start_window end_window n_boundaries
## 1            0     299999            0
## 2       300000     599999            0
## 3       600000     899999            0
## 4       900000    1199999            2
## 5      1200000    1499999            0
## 6      1500000    1799999            2
## 
## [[7]]
##   start_window end_window n_boundaries
## 1            0     349999            0
## 2       350000     699999            0
## 3       700000    1049999            2
## 4      1050000    1399999            0
## 5      1400000    1749999            1
## 6      1750000    2099999            3
## 
## [[8]]
##   start_window end_window n_boundaries
## 1            0     399999            0
## 2       400000     799999            0
## 3       800000    1199999            2
## 4      1200000    1599999            0
## 5      1600000    1999999            3
## 6      2000000    2399999            1
## 
## [[9]]
##   start_window end_window n_boundaries
## 1            0     449999            0
## 2       450000     899999            0
## 3       900000    1349999            2
## 4      1350000    1799999            2
## 5      1800000    2249999            2
## 6      2250000    2699999            2
## 
## [[10]]
##   start_window end_window n_boundaries
## 1            0     499999            0
## 2       500000     999999            1
## 3      1000000    1499999            1
## 4      1500000    1999999            3
## 5      2000000    2499999            2
## 6      2500000    2999999            1

Find regions that have most number of boudaries

## [[1]]
##    start_window end_window n_boundaries
## 19       900000     949999            1
## 21      1000000    1049999            1
## 33      1600000    1649999            1
## 36      1750000    1799999            1
## 39      1900000    1949999            1
## 42      2050000    2099999            1
## 
## [[2]]
##      start_window end_window n_boundaries
## 274      27300000   27399999            2
## 787      78600000   78699999            2
## 1567    156600000  156699999            2
## 1574    157300000  157399999            2
## 1863    186200000  186299999            2
## 1934    193300000  193399999            2
## 
## [[3]]
##     start_window end_window n_boundaries
## 7         900000    1049999            2
## 46       6750000    6899999            2
## 52       7650000    7799999            2
## 60       8850000    8999999            2
## 64       9450000    9599999            2
## 101     15000000   15149999            2
## 
## [[4]]
##      start_window end_window n_boundaries
## 932     186200000  186399999            3
## 967     193200000  193399999            3
## 1029    205600000  205799999            3
## 1040    207800000  207999999            3
## 
## [[5]]
##     start_window end_window n_boundaries
## 164     40750000   40999999            3
## 244     60750000   60999999            3
## 465    116000000  116249999            3
## 471    117500000  117749999            3
## 591    147500000  147749999            3
## 620    154750000  154999999            3
## 
## [[6]]
##     start_window end_window n_boundaries
## 686    205500000  205799999            4
## 782    234300000  234599999            4
## 
## [[7]]
##     start_window end_window n_boundaries
## 57      19600000   19949999            4
## 79      27300000   27649999            4
## 450    157150000  157499999            4
## 462    161350000  161699999            4
## 533    186200000  186549999            4
## 575    200900000  201249999            4
## 
## [[8]]
##     start_window end_window n_boundaries
## 50      19600000   19999999            4
## 66      26000000   26399999            4
## 102     40400000   40799999            4
## 291    116000000  116399999            4
## 387    154400000  154799999            4
## 388    154800000  155199999            4
## 
## [[9]]
##     start_window end_window n_boundaries
## 350    157050000  157499999            5
## 457    205200000  205649999            5
## 
## [[10]]
##     start_window end_window n_boundaries
## 82      40500000   40999999            5
## 233    116000000  116499999            5
## 310    154500000  154999999            5
## 315    157000000  157499999            5
## 387    193000000  193499999            5
## 412    205500000  205999999            5

From results above, there are 3 overall regions contain most boundaries: ++ region1 = (19600000 : 40100000 ) ++ region2 = (116000000 : 161200000 ) ++ region3 = (193000000 : 235000000 )

## region1 = (19600000  : 401000000 )
region1 = c(19600000:40100000)
region1_bound = arm.bound[which(arm.bound %in% region1)]
paste0('Number of boundaries in region1 = ',length(region1_bound))
## [1] "Number of boundaries in region1 = 106"
## region2 = (116000000 : 161200000 )
region2 = c(116000000 : 161200000)
region2_bound = arm.bound[which(arm.bound %in% region2)]
paste0('Number of boundaries in region2 = ',length(region2_bound))
## [1] "Number of boundaries in region2 = 100"
## region3 = (193000000 : 235000000 )
region3 = c(193000000 : 235000000   )
region3_bound = arm.bound[which(arm.bound %in% region3)]
paste0('Number of boundaries in region3 = ',length(region3_bound))
## [1] "Number of boundaries in region3 = 199"

Boundaries bp at each searching window

Association btw probbaility vector and insulation score curve

## Correlation between probbaility and insulation score in region 1:
## 
##  Pearson's product-moment correlation
## 
## data:  region1_prob$baseProbs and region1_is$Insulation_score
## t = -2.5001, df = 4099, p-value = 0.01246
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.069543786 -0.008422099
## sample estimates:
##         cor 
## -0.03901944
## Correlation between probbaility and insulation score in region 2:
## 
##  Pearson's product-moment correlation
## 
## data:  data.cor.2$baseProbs and data.cor.2$Insulation_score
## t = 0.91572, df = 3455, p-value = 0.3599
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.01776914  0.04888862
## sample estimates:
##        cor 
## 0.01557705
## Correlation between probbaility and insulation score in region 3:
## 
##  Pearson's product-moment correlation
## 
## data:  data.cor.3$baseProbs and data.cor.3$Insulation_score
## t = -6.8081, df = 8269, p-value = 1.058e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.09605670 -0.05319354
## sample estimates:
##        cor 
## -0.0746596

Distance between regions 1,2,3 boundaries and closest IS boundaries

Region 1

### Region 1

closest_IS.bound <- sapply(region1_bound, function(x) {
  distances = abs(boundaryIS_ch1$Start - x)
  min.d.pos = which(distances == min(distances) )
  closest.bound = boundaryIS_ch1$Start[min.d.pos[1]]
  return(closest.bound)
} )

report.e_bp <- data.frame(preciseTAD_boundary = region1_bound, 
                          closest_IS.bound)
report.e_bp$error_bp <- abs(report.e_bp$preciseTAD_boundary - report.e_bp$closest_IS.bound)
  
cat("Summary of distance from preciseTAD region1 and closest IS boundary:\n")
## Summary of distance from preciseTAD region1 and closest IS boundary:
summary(report.e_bp$error_bp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0   20000   65000   82972  125000  555000
plot(density(report.e_bp$error_bp), main = 'Density of Error bp from preciseTAD and closest IS boundary')

print(paste0('Number of overlapped TAD is ', length(which(report.e_bp$error_bp == 0)) ))
## [1] "Number of overlapped TAD is 3"

Region 2

### Region 2

closest_IS.bound <- sapply(region2_bound, function(x) {
  distances = abs(boundaryIS_ch1$Start - x)
  min.d.pos = which(distances == min(distances) )
  closest.bound = boundaryIS_ch1$Start[min.d.pos[1]]
  return(closest.bound)
} )

report.e_bp <- data.frame(preciseTAD_boundary = region2_bound, 
                          closest_IS.bound)
report.e_bp$error_bp <- abs(report.e_bp$preciseTAD_boundary - report.e_bp$closest_IS.bound)
  
cat("Summary of distance from preciseTAD region2 and closest IS boundary:\n")
## Summary of distance from preciseTAD region2 and closest IS boundary:
summary(report.e_bp$error_bp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0   15000   42500   67300  101250  320000
plot(density(report.e_bp$error_bp), main = 'Density of Error bp from preciseTAD and closest IS boundary')

print(paste0('Number of overlapped TAD is ', length(which(report.e_bp$error_bp == 0)) ))
## [1] "Number of overlapped TAD is 6"

Region 3

### Region 3

closest_IS.bound <- sapply(region3_bound, function(x) {
  distances = abs(boundaryIS_ch1$Start - x)
  min.d.pos = which(distances == min(distances) )
  closest.bound = boundaryIS_ch1$Start[min.d.pos[1]]
  return(closest.bound)
} )

report.e_bp <- data.frame(preciseTAD_boundary = region3_bound, 
                          closest_IS.bound)
report.e_bp$error_bp <- abs(report.e_bp$preciseTAD_boundary - report.e_bp$closest_IS.bound)
  
cat("Summary of distance from preciseTAD region 3 and closest IS boundary:\n")
## Summary of distance from preciseTAD region 3 and closest IS boundary:
summary(report.e_bp$error_bp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0   17500   55000   74900  105000  500000
plot(density(report.e_bp$error_bp), main = 'Density of Error bp from preciseTAD and closest IS boundary')

print(paste0('Number of overlapped TAD is ', length(which(report.e_bp$error_bp == 0)) ))
## [1] "Number of overlapped TAD is 13"