2 Narrow HNF4A peaks

3 Comparing paired probes considering family classification

3.1 Get HNF4A probes target regions

## GRanges object with 455 ranges and 19 metadata columns:
##         seqnames                 ranges strand |          GeneID
##            <Rle>              <IRanges>  <Rle> |     <character>
##     [1]     chr6   [45979349, 45979350]      * | ENSG00000001561
##     [2]    chr17   [37034884, 37034885]      * | ENSG00000002834
##     [3]     chr7   [95546508, 95546509]      * | ENSG00000004799
##     [4]     chr7   [95546539, 95546540]      * | ENSG00000004799
##     [5]     chr7   [95957175, 95957176]      * | ENSG00000004799
##     ...      ...                    ...    ... .             ...
##   [451]     chr2 [ 62797901,  62797902]      * | ENSG00000266097
##   [452]     chr2 [ 62797865,  62797866]      * | ENSG00000266097
##   [453]     chr2 [ 62797926,  62797927]      * | ENSG00000266097
##   [454]    chr10 [127897162, 127897163]      * | ENSG00000277371
##   [455]    chr10 [126211892, 126211893]      * | ENSG00000278831
##               Probe      Symbol       Sides        Raw.p          FDR
##         <character> <character> <character>    <numeric>    <numeric>
##     [1]  cg16474725       ENPP4          R6 1.270122e-21 1.939775e-20
##     [2]  cg27061889       LASP1          R1 7.105825e-20 7.827895e-19
##     [3]  cg09136052        PDK4          L2 8.477625e-16 4.870098e-15
##     [4]  cg18450582        PDK4          L2 8.477625e-16 4.870098e-15
##     [5]  cg10694598        PDK4          L5 8.477625e-16 4.870098e-15
##     ...         ...         ...         ...          ...          ...
##   [451]  cg08573315     MIR5192         L10 8.319520e-08 1.670574e-07
##   [452]  cg27227250     MIR5192         L10 8.319520e-08 1.670574e-07
##   [453]  cg04036593     MIR5192         L10 8.319520e-08 1.670574e-07
##   [454]  cg17287034 Metazoa_SRP          L7 1.373459e-06 2.310831e-06
##   [455]  cg16426764  AL513190.1          R3 9.809833e-06 1.445882e-05
##         distNearestTSS DMC_analysis_pvalue DMC_analysis_escc_Minus_esad
##              <integer>           <numeric>                    <numeric>
##     [1]         118379        2.576139e-23                    0.3499644
##     [2]           4741        1.656185e-36                    0.3364163
##     [3]         320704        1.948822e-21                    0.3318075
##     [4]         320735        6.976460e-20                    0.3926457
##     [5]         731371        8.877982e-24                    0.3241807
##     ...            ...                 ...                          ...
##   [451]         364940        5.470409e-55                    0.3887874
##   [452]         364904        1.437656e-57                    0.4449732
##   [453]         364965        2.720400e-58                    0.4903376
##   [454]         339298        1.362491e-20                    0.3099911
##   [455]         100028        6.455562e-32                    0.3729131
##         DMC_analysis_adjust.p external_gene_name original_ensembl_gene_id
##                     <numeric>        <character>              <character>
##     [1]          3.119777e-21              ENPP4        ENSG00000001561.6
##     [2]          1.227075e-33              LASP1       ENSG00000002834.16
##     [3]          1.817628e-19               PDK4        ENSG00000004799.7
##     [4]          5.281645e-18               PDK4        ENSG00000004799.7
##     [5]          1.152399e-21               PDK4        ENSG00000004799.7
##     ...                   ...                ...                      ...
##   [451]          6.558573e-51            MIR5192        ENSG00000266097.1
##   [452]          2.708565e-53            MIR5192        ENSG00000266097.1
##   [453]          7.175381e-54            MIR5192        ENSG00000266097.1
##   [454]          1.145422e-18        Metazoa_SRP        ENSG00000277371.1
##   [455]          2.425544e-29         AL513190.1        ENSG00000278831.1
##                                                                                                                                                                      TF
##                                                                                                                                                             <character>
##     [1]                                          HNF4A;HNF4G;NR2F2;NR2C1;FOXA3;FOXA2;FOXP4;FOXD2;FOXJ1;SPDEF;ELF3;NR1I2;PPARG;RORC;THRA;NR1H3;NR1H4;ZSCAN16;ZNF816;IRF8
##     [2]                                                                           HNF4A;HNF4G;NR2F2;NR2C1;FOXA3;FOXA2;FOXP4;FOXD2;FOXJ1;SPDEF;ELF3;NR3C2;ZSCAN16;ZNF816
##     [3]                                                  HNF4A;HNF4G;NR2F2;NR2C1;FOXA3;FOXA2;FOXP4;FOXD2;FOXJ1;NR1I2;PPARG;RORC;THRA;NR1H3;NR1H4;NFATC2;SPDEF;ELF3;IRF8
##     [4]                                                  HNF4A;HNF4G;NR2F2;NR2C1;FOXA3;FOXA2;FOXP4;FOXD2;FOXJ1;NR1I2;PPARG;RORC;THRA;NR1H3;NR1H4;NFATC2;IRF8;SPDEF;ELF3
##     [5]                                              HNF4A;HNF4G;NR2F2;NR2C1;FOXA3;FOXA2;FOXP4;FOXD2;FOXJ1;NR1I2;PPARG;RORC;THRA;NR1H3;NR1H4;NFATC2;ZSCAN16;ZNF816;SOX9
##     ...                                                                                                                                                             ...
##   [451]      HNF4A;HNF4G;NR2F2;NR2C1;FOXA3;FOXA2;FOXP4;FOXD2;FOXJ1;ZNF620;ZSCAN16;ZNF774;ZNF468;ZNF33A;ZNF816;ZNF619;ZNF765;ZNF799;ZNF763;ZNF823;ZNF124;SPDEF;ELF3;SOX9
##   [452]      HNF4A;HNF4G;NR2F2;NR2C1;FOXA3;FOXA2;FOXP4;FOXD2;FOXJ1;ZNF620;ZSCAN16;ZNF774;ZNF468;ZNF33A;ZNF816;ZNF619;ZNF765;ZNF799;ZNF763;ZNF823;ZNF124;SPDEF;ELF3;SOX9
##   [453] HNF4A;HNF4G;NR2F2;NR2C1;FOXA3;FOXA2;FOXP4;FOXD2;FOXJ1;IRF8;ZNF620;ZSCAN16;ZNF774;ZNF468;ZNF33A;ZNF816;ZNF619;ZNF765;ZNF799;ZNF763;ZNF823;ZNF124;SPDEF;ELF3;SOX9
##   [454]       FOXA3;FOXA2;FOXP4;FOXD2;FOXJ1;HNF4A;HNF4G;NR2F2;NR2C1;SPDEF;ELF3;GATA6;GATA4;HNF1A;HNF1B;PDX1;CDX2;HOXB6;HOXB5;MNX1;EVX1;CDX1;HOXA13;HOXB9;ZSCAN16;ZNF816
##   [455]                                                   HNF4A;HNF4G;NR2F2;NR2C1;FOXA3;FOXA2;FOXP4;FOXD2;FOXJ1;NR1I2;PPARG;RORC;THRA;NR1H3;NR1H4;SPDEF;ELF3;SOX9;NR3C2
##         probe_hg19_width gene_hg19_seqnames gene_hg19_start gene_hg19_end
##                <integer>        <character>       <integer>     <integer>
##     [1]                2               chr6        46097730      46114436
##     [2]                2              chr17        37026112      37078023
##     [3]                2               chr7        95212811      95225803
##     [4]                2               chr7        95212811      95225803
##     [5]                2               chr7        95212811      95225803
##     ...              ...                ...             ...           ...
##   [451]                2               chr2        62432961      62433052
##   [452]                2               chr2        62432961      62433052
##   [453]                2               chr2        62432961      62433052
##   [454]                2              chr10       127557864     127558145
##   [455]                2              chr10       126311922     126312648
##         gene_hg19_width gene_hg19_strand
##               <integer>      <character>
##     [1]           16707                +
##     [2]           51912                +
##     [3]           12993                -
##     [4]           12993                -
##     [5]           12993                -
##     ...             ...              ...
##   [451]              92                +
##   [452]              92                +
##   [453]              92                +
##   [454]             282                +
##   [455]             727                +
##   -------
##   seqinfo: 23 sequences from an unspecified genome; no seqlengths

3.2 Distance between probes and peaks

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

3.3 Results overlap

3.3.1 Overlap between probes (+-250bp) and peaks

## GRanges object with 455 ranges and 2 metadata columns:
##         seqnames                 ranges strand |          GeneID
##            <Rle>              <IRanges>  <Rle> |     <character>
##     [1]     chr6   [45979099, 45979599]      * | ENSG00000001561
##     [2]    chr17   [37034634, 37035134]      * | ENSG00000002834
##     [3]     chr7   [95546258, 95546758]      * | ENSG00000004799
##     [4]     chr7   [95546289, 95546789]      * | ENSG00000004799
##     [5]     chr7   [95956925, 95957425]      * | ENSG00000004799
##     ...      ...                    ...    ... .             ...
##   [451]     chr2 [ 62797651,  62798151]      * | ENSG00000266097
##   [452]     chr2 [ 62797615,  62798115]      * | ENSG00000266097
##   [453]     chr2 [ 62797676,  62798176]      * | ENSG00000266097
##   [454]    chr10 [127896912, 127897412]      * | ENSG00000277371
##   [455]    chr10 [126211642, 126212142]      * | ENSG00000278831
##               Probe
##         <character>
##     [1]  cg16474725
##     [2]  cg27061889
##     [3]  cg09136052
##     [4]  cg18450582
##     [5]  cg10694598
##     ...         ...
##   [451]  cg08573315
##   [452]  cg27227250
##   [453]  cg04036593
##   [454]  cg17287034
##   [455]  cg16426764
##   -------
##   seqinfo: 23 sequences from an unspecified genome; no seqlengths
## [1] 501
## [1] 39

3.3.3 Regions overlapped

3.3.4 Enrichement between peaks and all distal probes

##                      Overlap No overlap
## Unique Linked probes      39        416
## Distal probes           1063     162513
## 
##  Fisher's Exact Test for Count Data
## 
## data:  x
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   9.988211 20.051123
## sample estimates:
## odds ratio 
##   14.33474

3.3.6 Enrichement between peaks and distal probes without signature for family members

##                                              Overlap No overlap
## Unique Linked probes                              39        416
## Distal probes without family motif signature     517      97614
## 
##  Fisher's Exact Test for Count Data
## 
## data:  x
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  12.26892 24.92618
## sample estimates:
## odds ratio 
##   17.69461

4 Comparing paired probes considering subfamily classification

## GRanges object with 203 ranges and 19 metadata columns:
##         seqnames                 ranges strand |          GeneID
##            <Rle>              <IRanges>  <Rle> |     <character>
##     [1]     chr6   [45979349, 45979350]      * | ENSG00000001561
##     [2]    chr17   [37034884, 37034885]      * | ENSG00000002834
##     [3]     chr7   [95546508, 95546509]      * | ENSG00000004799
##     [4]     chr7   [95546539, 95546540]      * | ENSG00000004799
##     [5]     chr7   [95957175, 95957176]      * | ENSG00000004799
##     ...      ...                    ...    ... .             ...
##   [199]     chr7 [ 27173480,  27173481]      * | ENSG00000254369
##   [200]     chr2 [ 62797901,  62797902]      * | ENSG00000266097
##   [201]     chr2 [ 62797865,  62797866]      * | ENSG00000266097
##   [202]     chr2 [ 62797926,  62797927]      * | ENSG00000266097
##   [203]    chr10 [126211892, 126211893]      * | ENSG00000278831
##               Probe      Symbol       Sides        Raw.p          FDR
##         <character> <character> <character>    <numeric>    <numeric>
##     [1]  cg16474725       ENPP4          R6 1.270122e-21 1.939775e-20
##     [2]  cg27061889       LASP1          R1 7.105825e-20 7.827895e-19
##     [3]  cg09136052        PDK4          L2 8.477625e-16 4.870098e-15
##     [4]  cg18450582        PDK4          L2 8.477625e-16 4.870098e-15
##     [5]  cg10694598        PDK4          L5 8.477625e-16 4.870098e-15
##     ...         ...         ...         ...          ...          ...
##   [199]  cg17285448    HOXA-AS3          R2 3.201199e-06 5.089675e-06
##   [200]  cg08573315     MIR5192         L10 8.319520e-08 1.670574e-07
##   [201]  cg27227250     MIR5192         L10 8.319520e-08 1.670574e-07
##   [202]  cg04036593     MIR5192         L10 8.319520e-08 1.670574e-07
##   [203]  cg16426764  AL513190.1          R3 9.809833e-06 1.445882e-05
##         distNearestTSS DMC_analysis_pvalue DMC_analysis_escc_Minus_esad
##              <integer>           <numeric>                    <numeric>
##     [1]         118379        2.576139e-23                    0.3499644
##     [2]           4741        1.656185e-36                    0.3364163
##     [3]         320704        1.948822e-21                    0.3318075
##     [4]         320735        6.976460e-20                    0.3926457
##     [5]         731371        8.877982e-24                    0.3241807
##     ...            ...                 ...                          ...
##   [199]           3884        5.267260e-26                    0.3772209
##   [200]         364940        5.470409e-55                    0.3887874
##   [201]         364904        1.437656e-57                    0.4449732
##   [202]         364965        2.720400e-58                    0.4903376
##   [203]         100028        6.455562e-32                    0.3729131
##         DMC_analysis_adjust.p external_gene_name original_ensembl_gene_id
##                     <numeric>        <character>              <character>
##     [1]          3.119777e-21              ENPP4        ENSG00000001561.6
##     [2]          1.227075e-33              LASP1       ENSG00000002834.16
##     [3]          1.817628e-19               PDK4        ENSG00000004799.7
##     [4]          5.281645e-18               PDK4        ENSG00000004799.7
##     [5]          1.152399e-21               PDK4        ENSG00000004799.7
##     ...                   ...                ...                      ...
##   [199]          9.286785e-24           HOXA-AS3        ENSG00000254369.5
##   [200]          6.558573e-51            MIR5192        ENSG00000266097.1
##   [201]          2.708565e-53            MIR5192        ENSG00000266097.1
##   [202]          7.175381e-54            MIR5192        ENSG00000266097.1
##   [203]          2.425544e-29         AL513190.1        ENSG00000278831.1
##                                                                       TF
##                                                              <character>
##     [1]                 HNF4A;HNF4G;ELF3;PPARG;FOXD2;NR1H3;NR1H4;ZSCAN16
##     [2]                 HNF4A;HNF4G;FOXA3;FOXA2;FOXD2;ELF3;NR3C2;ZSCAN16
##     [3]                  HNF4A;HNF4G;FOXA3;FOXA2;FOXD2;NR1H3;NR1H4;NR2F2
##     [4]                  HNF4A;HNF4G;FOXA3;FOXA2;FOXD2;NR1H3;NR1H4;NR2F2
##     [5]                HNF4A;HNF4G;NR2F2;PPARG;FOXD2;NR1H3;NR1H4;ZSCAN16
##     ...                                                              ...
##   [199]           HNF4A;HNF4G;NR2F2;PPARG;NR1H3;NR1H4;TCF7L2;NR1I2;NR3C2
##   [200]                                          HNF4A;HNF4G;NR2F2;FOXD2
##   [201]                                          HNF4A;HNF4G;NR2F2;FOXD2
##   [202]                                          HNF4A;HNF4G;NR2F2;FOXD2
##   [203] HNF4A;HNF4G;FOXA3;FOXA2;FOXD2;PPARG;ELF3;NR1H3;NR1H4;NR2F2;NR3C2
##         probe_hg19_width gene_hg19_seqnames gene_hg19_start gene_hg19_end
##                <integer>        <character>       <integer>     <integer>
##     [1]                2               chr6        46097730      46114436
##     [2]                2              chr17        37026112      37078023
##     [3]                2               chr7        95212811      95225803
##     [4]                2               chr7        95212811      95225803
##     [5]                2               chr7        95212811      95225803
##     ...              ...                ...             ...           ...
##   [199]                2               chr7        27169596      27195547
##   [200]                2               chr2        62432961      62433052
##   [201]                2               chr2        62432961      62433052
##   [202]                2               chr2        62432961      62433052
##   [203]                2              chr10       126311922     126312648
##         gene_hg19_width gene_hg19_strand
##               <integer>      <character>
##     [1]           16707                +
##     [2]           51912                +
##     [3]           12993                -
##     [4]           12993                -
##     [5]           12993                -
##     ...             ...              ...
##   [199]           25952                +
##   [200]              92                +
##   [201]              92                +
##   [202]              92                +
##   [203]             727                +
##   -------
##   seqinfo: 22 sequences from an unspecified genome; no seqlengths
## GRanges object with 203 ranges and 2 metadata columns:
##         seqnames                 ranges strand |          GeneID
##            <Rle>              <IRanges>  <Rle> |     <character>
##     [1]     chr6   [45979099, 45979599]      * | ENSG00000001561
##     [2]    chr17   [37034634, 37035134]      * | ENSG00000002834
##     [3]     chr7   [95546258, 95546758]      * | ENSG00000004799
##     [4]     chr7   [95546289, 95546789]      * | ENSG00000004799
##     [5]     chr7   [95956925, 95957425]      * | ENSG00000004799
##     ...      ...                    ...    ... .             ...
##   [199]     chr7 [ 27173230,  27173730]      * | ENSG00000254369
##   [200]     chr2 [ 62797651,  62798151]      * | ENSG00000266097
##   [201]     chr2 [ 62797615,  62798115]      * | ENSG00000266097
##   [202]     chr2 [ 62797676,  62798176]      * | ENSG00000266097
##   [203]    chr10 [126211642, 126212142]      * | ENSG00000278831
##               Probe
##         <character>
##     [1]  cg16474725
##     [2]  cg27061889
##     [3]  cg09136052
##     [4]  cg18450582
##     [5]  cg10694598
##     ...         ...
##   [199]  cg17285448
##   [200]  cg08573315
##   [201]  cg27227250
##   [202]  cg04036593
##   [203]  cg16426764
##   -------
##   seqinfo: 22 sequences from an unspecified genome; no seqlengths
## [1] 501
## [1] 34

4.1 Enrichement between peaks and distal probes

##                      Overlap No overlap
## Unique Linked probes      34        169
## Distal probes           1063     162513
## 
##  Fisher's Exact Test for Count Data
## 
## data:  x
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  20.51252 44.95357
## sample estimates:
## odds ratio 
##   30.75854

5 Enrichement plot

names(probes_extended_500bp) <- probes_extended_500bp$Probe
p <- probes_extended_500bp[targets$Probe]

mat1 <- normalizeToMatrix(peaks.gr, 
                          p, 
                          value_column = "peak",
                          extend = 5000, 
                          mean_mode = "w0", 
                          w = 50)


set.seed(1234)
chip.heatmap <- EnrichedHeatmap(mat1,
                #km = 2, # Kmeans cluster
                #row_title_rot = 0,
                #cluster_rows = TRUE, 
                #top_annotation = HeatmapAnnotation(enriched = anno_enriched(gp = gpar(col = 2:4, lty = 1:3))),
                #split = sample(c("A", "B"), 455, replace = TRUE),
                name = "HNF4A TF", 
                col = c("white", "red"))


exp <- read_tsv("/hdd/ESCA/QD320C/QD320C_results_hg38_gencodev28/stringtieFPKM/Eso26siHNF4AB1-SA06722_S7_L002_1Aligned.sortedByCoord.out.gene_abund.txt",col_types = readr::cols())
exp$`Gene ID` <- sapply(exp$`Gene ID`, function(x) unlist(stringr::str_split(x,"\\."))[1])
exp <- exp[match(targets$GeneID,exp$`Gene ID`),]

exp.heatmap <- Heatmap(log2(exp$TPM + 1), 
        col = c("white", "orange"), 
        name = "Targets log2(TPM + 1) - Eso26siHNF4AB1", 
        show_row_names = FALSE, 
        width = unit(5, "mm")
) 

exp2 <- read_tsv("/hdd/ESCA/QD320C/QD320C_results_hg38_gencodev28/stringtieFPKM/Eso26siHNF4AB2-SA06723_S8_L002_1Aligned.sortedByCoord.out.gene_abund.txt",col_types = readr::cols())
exp2$`Gene ID` <- sapply(exp2$`Gene ID`, function(x) unlist(stringr::str_split(x,"\\."))[1])
exp2 <- exp2[match(targets$GeneID,exp$`Gene ID`),]

exp.heatmap2 <- Heatmap(log2(exp2$TPM + 1), 
        col = c("white", "orange"), 
        name = "Targets log2(TPM + 1) - Eso26siHNF4AB2", 
        show_row_names = FALSE, 
        width = unit(5, "mm")
) 


meth_col_fun = circlize::colorRamp2(c(min(p$DMC_analysis_escc_Minus_esad), 
                                      max(p$DMC_analysis_escc_Minus_esad)), 
                                    c("white", "red"))
dna.met.heatmap <- Heatmap(p$DMC_analysis_escc_Minus_esad,
                           name = "Diff DNA met (escc - esad)",
                           col = meth_col_fun,
                           width = unit(5, "mm"))

pair.pvalue.heatmap <- Heatmap(-log10(p$FDR),
                           name = "-log10(Pair FDR)",
                           width = unit(5, "mm"))

chip.heatmap + exp.heatmap + exp.heatmap2 + dna.met.heatmap + pair.pvalue.heatmap