## [1] 245955000
Use optimal harmonic = 1100 (found in scipt 17)
## # A tibble: 6 × 4
## # Groups: left_TAD, center_TAD [6]
## left_TAD center_TAD right_TAD Freq
## <int> <int> <int> <int>
## 1 181 345 512 1
## 2 483 497 512 1
## 3 512 600 692 1
## 4 512 617 732 1
## 5 512 622 732 1
## 6 512 641 779 1
## # A tibble: 767 × 4
## # Groups: left_TAD, center_TAD [766]
## left_TAD center_TAD right_TAD Freq
## <int> <int> <int> <int>
## 1 181 345 512 1
## 2 483 497 512 1
## 3 512 600 692 1
## 4 512 617 732 1
## 5 512 622 732 1
## 6 512 641 779 1
## 7 512 673 835 1
## 8 512 799 1103 1
## 9 512 816 1103 1
## 10 512 855 1182 1
## # ℹ 757 more rows
## # A tibble: 6 × 4
## # Groups: left_TAD, center_TAD [6]
## left_TAD center_TAD right_TAD Freq
## <int> <int> <int> <int>
## 1 46 1401 2872 1
## 2 46 1420 2872 1
## 3 46 1459 2872 1
## 4 46 1507 2872 1
## 5 349 1554 2872 1
## 6 349 1577 2872 1
## # A tibble: 767 × 4
## # Groups: left_TAD, center_TAD [767]
## left_TAD center_TAD right_TAD Freq
## <int> <int> <int> <int>
## 1 46 1401 2872 1
## 2 46 1420 2872 1
## 3 46 1459 2872 1
## 4 46 1507 2872 1
## 5 349 1554 2872 1
## 6 349 1577 2872 1
## 7 349 1609 2872 1
## 8 349 1630 2872 1
## 9 689 1731 2872 1
## 10 689 1764 2872 1
## # ℹ 757 more rows
| left_TAD | right_TAD | center_TAD | averageProb_diff | error_symetric | overlap | actual_size | t.IS._left | t.IS._right | avg_t.IS. |
|---|---|---|---|---|---|---|---|---|---|
| 900000 | 2555000 | 1720000 | 0.6347220 | 0.0182927 | 0 | 1655000 | 0.0765050 | 0.2374860 | 0.0765050 |
| 2410000 | 2555000 | 2480000 | 0.5139072 | 0.0714286 | 0 | 145000 | 0.8778770 | 0.2411200 | 0.8778770 |
| 2555000 | 3455000 | 2995000 | 0.8111084 | 0.0454545 | 0 | 900000 | -0.2473022 | 0.2166828 | -0.2473022 |
| 2555000 | 3655000 | 3080000 | 0.8177772 | 0.0952381 | 0 | 1100000 | -0.2080633 | -0.3796683 | -0.2080633 |
| 2555000 | 3655000 | 3105000 | 0.8356176 | 0.0000000 | 0 | 1100000 | -0.1939253 | -0.3655303 | -0.1939253 |
| 2555000 | 3890000 | 3200000 | 0.8205000 | 0.0697674 | 0 | 1335000 | -0.1754102 | 0.5393428 | -0.1754102 |
| left_TAD | right_TAD | center_TAD | averageProb_diff | error_symetric | overlap | actual_size | t.IS._left | t.IS._right | avg_t.IS. |
|---|---|---|---|---|---|---|---|---|---|
| 140055000 | 157180000 | 149825000 | 0.5236988 | 0.0856089 | 10 | 17125000 | NA | NA | NA |
| 140055000 | 157180000 | 149920000 | 0.3829540 | 0.0567686 | 10 | 17125000 | NA | NA | NA |
| 140055000 | 157180000 | 150115000 | 0.4454908 | 0.0000000 | 10 | 17125000 | NA | NA | NA |
| 140055000 | 157180000 | 150355000 | 0.5811924 | 0.0703297 | 10 | 17125000 | NA | NA | NA |
| 143700000 | 157180000 | 150590000 | 0.3097900 | 0.0937759 | 10 | 13480000 | NA | NA | NA |
| 143700000 | 157180000 | 150705000 | 0.6360280 | 0.0545603 | 10 | 13480000 | NA | NA | NA |
Any TAD with same boundaries, choose the one with smallest error of symmetric
Report Measurement 1
| left_TAD | right_TAD | center_TAD | averageProb_diff | error_symetric | overlap | actual_size | t.IS._left | t.IS._right | avg_t.IS. | |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 900000 | 2555000 | 1720000 | 0.6347220 | 0.0182927 | 0 | 1655000 | 0.0765050 | 0.2374860 | 0.0765050 |
| 2 | 2410000 | 2555000 | 2480000 | 0.5139072 | 0.0714286 | 0 | 145000 | 0.8778770 | 0.2411200 | 0.8778770 |
| 3 | 2555000 | 3455000 | 2995000 | 0.8111084 | 0.0454545 | 0 | 900000 | -0.2473022 | 0.2166828 | -0.2473022 |
| 4 | 2555000 | 3655000 | 3080000 | 0.8177772 | 0.0952381 | 0 | 1100000 | -0.2080633 | -0.3796683 | -0.2080633 |
| 6 | 2555000 | 3890000 | 3200000 | 0.8205000 | 0.0697674 | 0 | 1335000 | -0.1754102 | 0.5393428 | -0.1754102 |
| 7 | 2555000 | 4170000 | 3360000 | 0.7471808 | 0.0062112 | 0 | 1615000 | -0.4127700 | 0.0776680 | -0.4127700 |
Report Figure 1
Report Measurement 2
| left_TAD | right_TAD | center_TAD | averageProb_diff | error_symetric | overlap | actual_size | t.IS._left | t.IS._right | avg_t.IS. | |
|---|---|---|---|---|---|---|---|---|---|---|
| 9 | 145715000 | 157180000 | 151475000 | 0.4175512 | 0.0950096 | 10 | 11465000 | -0.1312420 | 0.4827650 | -0.1312420 |
| 14 | 147090000 | 157180000 | 152585000 | 0.1761212 | 0.0783460 | 10 | 10090000 | -0.0410526 | 0.3227817 | -0.0410526 |
| 15 | 147265000 | 157180000 | 152710000 | 0.4930088 | 0.0973154 | 10 | 9915000 | 0.3108627 | 0.3144157 | 0.3108627 |
| 16 | 147500000 | 157180000 | 152805000 | 0.4636148 | 0.0891429 | 10 | 9680000 | 0.0637285 | 0.2497180 | 0.0637285 |
| 17 | 147710000 | 157180000 | 152905000 | 0.3734940 | 0.0888889 | 10 | 9470000 | 0.1615609 | 0.4046250 | 0.1615609 |
| 21 | 149960000 | 157180000 | 153725000 | 0.2804360 | 0.0897250 | 10 | 7220000 | -0.2658370 | 0.8375370 | -0.2658370 |
Report Figure 2
## [1] 663
## [1] 1396
Found closest IS boundary (as truth). Find error from preciseTAD
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 15000 45000 77852 115000 675000
## [1] 52
## [[1]]
## start_window end_window n_boundaries
## 1 0 49999 0
## 2 50000 99999 0
## 3 100000 149999 0
## 4 150000 199999 0
## 5 200000 249999 0
## 6 250000 299999 0
##
## [[2]]
## start_window end_window n_boundaries
## 1 0e+00 99999 0
## 2 1e+05 199999 0
## 3 2e+05 299999 0
## 4 3e+05 399999 0
## 5 4e+05 499999 0
## 6 5e+05 599999 0
##
## [[3]]
## start_window end_window n_boundaries
## 1 0 149999 0
## 2 150000 299999 0
## 3 300000 449999 0
## 4 450000 599999 0
## 5 600000 749999 0
## 6 750000 899999 0
##
## [[4]]
## start_window end_window n_boundaries
## 1 0e+00 199999 0
## 2 2e+05 399999 0
## 3 4e+05 599999 0
## 4 6e+05 799999 0
## 5 8e+05 999999 1
## 6 1e+06 1199999 1
##
## [[5]]
## start_window end_window n_boundaries
## 1 0 249999 0
## 2 250000 499999 0
## 3 500000 749999 0
## 4 750000 999999 1
## 5 1000000 1249999 1
## 6 1250000 1499999 0
##
## [[6]]
## start_window end_window n_boundaries
## 1 0 299999 0
## 2 300000 599999 0
## 3 600000 899999 0
## 4 900000 1199999 2
## 5 1200000 1499999 0
## 6 1500000 1799999 2
##
## [[7]]
## start_window end_window n_boundaries
## 1 0 349999 0
## 2 350000 699999 0
## 3 700000 1049999 2
## 4 1050000 1399999 0
## 5 1400000 1749999 1
## 6 1750000 2099999 3
##
## [[8]]
## start_window end_window n_boundaries
## 1 0 399999 0
## 2 400000 799999 0
## 3 800000 1199999 2
## 4 1200000 1599999 0
## 5 1600000 1999999 3
## 6 2000000 2399999 1
##
## [[9]]
## start_window end_window n_boundaries
## 1 0 449999 0
## 2 450000 899999 0
## 3 900000 1349999 2
## 4 1350000 1799999 2
## 5 1800000 2249999 2
## 6 2250000 2699999 2
##
## [[10]]
## start_window end_window n_boundaries
## 1 0 499999 0
## 2 500000 999999 1
## 3 1000000 1499999 1
## 4 1500000 1999999 3
## 5 2000000 2499999 2
## 6 2500000 2999999 1
## [[1]]
## start_window end_window n_boundaries
## 19 900000 949999 1
## 21 1000000 1049999 1
## 33 1600000 1649999 1
## 36 1750000 1799999 1
## 39 1900000 1949999 1
## 42 2050000 2099999 1
##
## [[2]]
## start_window end_window n_boundaries
## 274 27300000 27399999 2
## 787 78600000 78699999 2
## 1567 156600000 156699999 2
## 1574 157300000 157399999 2
## 1863 186200000 186299999 2
## 1934 193300000 193399999 2
##
## [[3]]
## start_window end_window n_boundaries
## 7 900000 1049999 2
## 46 6750000 6899999 2
## 52 7650000 7799999 2
## 60 8850000 8999999 2
## 64 9450000 9599999 2
## 101 15000000 15149999 2
##
## [[4]]
## start_window end_window n_boundaries
## 932 186200000 186399999 3
## 967 193200000 193399999 3
## 1029 205600000 205799999 3
## 1040 207800000 207999999 3
##
## [[5]]
## start_window end_window n_boundaries
## 164 40750000 40999999 3
## 244 60750000 60999999 3
## 465 116000000 116249999 3
## 471 117500000 117749999 3
## 591 147500000 147749999 3
## 620 154750000 154999999 3
##
## [[6]]
## start_window end_window n_boundaries
## 686 205500000 205799999 4
## 782 234300000 234599999 4
##
## [[7]]
## start_window end_window n_boundaries
## 57 19600000 19949999 4
## 79 27300000 27649999 4
## 450 157150000 157499999 4
## 462 161350000 161699999 4
## 533 186200000 186549999 4
## 575 200900000 201249999 4
##
## [[8]]
## start_window end_window n_boundaries
## 50 19600000 19999999 4
## 66 26000000 26399999 4
## 102 40400000 40799999 4
## 291 116000000 116399999 4
## 387 154400000 154799999 4
## 388 154800000 155199999 4
##
## [[9]]
## start_window end_window n_boundaries
## 350 157050000 157499999 5
## 457 205200000 205649999 5
##
## [[10]]
## start_window end_window n_boundaries
## 82 40500000 40999999 5
## 233 116000000 116499999 5
## 310 154500000 154999999 5
## 315 157000000 157499999 5
## 387 193000000 193499999 5
## 412 205500000 205999999 5
From results above, there are 3 overall regions contain most boundaries: ++ region1 = (19600000 : 40100000 ) ++ region2 = (116000000 : 161200000 ) ++ region3 = (193000000 : 235000000 )
## region1 = (19600000 : 401000000 )
region1 = c(19600000:40100000)
region1_bound = arm.bound[which(arm.bound %in% region1)]
paste0('Number of boundaries in region1 = ',length(region1_bound))
## [1] "Number of boundaries in region1 = 106"
## region2 = (116000000 : 161200000 )
region2 = c(116000000 : 161200000)
region2_bound = arm.bound[which(arm.bound %in% region2)]
paste0('Number of boundaries in region2 = ',length(region2_bound))
## [1] "Number of boundaries in region2 = 100"
## region3 = (193000000 : 235000000 )
region3 = c(193000000 : 235000000 )
region3_bound = arm.bound[which(arm.bound %in% region3)]
paste0('Number of boundaries in region3 = ',length(region3_bound))
## [1] "Number of boundaries in region3 = 199"
Boundaries bp at each searching window
## Correlation between probbaility and insulation score in region 1:
##
## Pearson's product-moment correlation
##
## data: region1_prob$baseProbs and region1_is$Insulation_score
## t = -2.5001, df = 4099, p-value = 0.01246
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.069543786 -0.008422099
## sample estimates:
## cor
## -0.03901944
## Correlation between probbaility and insulation score in region 2:
##
## Pearson's product-moment correlation
##
## data: data.cor.2$baseProbs and data.cor.2$Insulation_score
## t = 0.91572, df = 3455, p-value = 0.3599
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.01776914 0.04888862
## sample estimates:
## cor
## 0.01557705
## Correlation between probbaility and insulation score in region 3:
##
## Pearson's product-moment correlation
##
## data: data.cor.3$baseProbs and data.cor.3$Insulation_score
## t = -6.8081, df = 8269, p-value = 1.058e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.09605670 -0.05319354
## sample estimates:
## cor
## -0.0746596
Region 1
### Region 1
closest_IS.bound <- sapply(region1_bound, function(x) {
distances = abs(boundaryIS_ch1$Start - x)
min.d.pos = which(distances == min(distances) )
closest.bound = boundaryIS_ch1$Start[min.d.pos[1]]
return(closest.bound)
} )
report.e_bp <- data.frame(preciseTAD_boundary = region1_bound,
closest_IS.bound)
report.e_bp$error_bp <- abs(report.e_bp$preciseTAD_boundary - report.e_bp$closest_IS.bound)
cat("Summary of distance from preciseTAD region1 and closest IS boundary:\n")
## Summary of distance from preciseTAD region1 and closest IS boundary:
summary(report.e_bp$error_bp)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 20000 65000 82972 125000 555000
plot(density(report.e_bp$error_bp), main = 'Density of Error bp from preciseTAD and closest IS boundary')
print(paste0('Number of overlapped TAD is ', length(which(report.e_bp$error_bp == 0)) ))
## [1] "Number of overlapped TAD is 3"
Region 2
### Region 2
closest_IS.bound <- sapply(region2_bound, function(x) {
distances = abs(boundaryIS_ch1$Start - x)
min.d.pos = which(distances == min(distances) )
closest.bound = boundaryIS_ch1$Start[min.d.pos[1]]
return(closest.bound)
} )
report.e_bp <- data.frame(preciseTAD_boundary = region2_bound,
closest_IS.bound)
report.e_bp$error_bp <- abs(report.e_bp$preciseTAD_boundary - report.e_bp$closest_IS.bound)
cat("Summary of distance from preciseTAD region2 and closest IS boundary:\n")
## Summary of distance from preciseTAD region2 and closest IS boundary:
summary(report.e_bp$error_bp)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 15000 42500 67300 101250 320000
plot(density(report.e_bp$error_bp), main = 'Density of Error bp from preciseTAD and closest IS boundary')
print(paste0('Number of overlapped TAD is ', length(which(report.e_bp$error_bp == 0)) ))
## [1] "Number of overlapped TAD is 6"
Region 3
### Region 3
closest_IS.bound <- sapply(region3_bound, function(x) {
distances = abs(boundaryIS_ch1$Start - x)
min.d.pos = which(distances == min(distances) )
closest.bound = boundaryIS_ch1$Start[min.d.pos[1]]
return(closest.bound)
} )
report.e_bp <- data.frame(preciseTAD_boundary = region3_bound,
closest_IS.bound)
report.e_bp$error_bp <- abs(report.e_bp$preciseTAD_boundary - report.e_bp$closest_IS.bound)
cat("Summary of distance from preciseTAD region 3 and closest IS boundary:\n")
## Summary of distance from preciseTAD region 3 and closest IS boundary:
summary(report.e_bp$error_bp)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 17500 55000 74900 105000 500000
plot(density(report.e_bp$error_bp), main = 'Density of Error bp from preciseTAD and closest IS boundary')
print(paste0('Number of overlapped TAD is ', length(which(report.e_bp$error_bp == 0)) ))
## [1] "Number of overlapped TAD is 13"