load("~/paper_elmer/ESCA_analysis_hg38/escc_vs_esad_hg38/hyper/ELMER_results_hyper.rda")
family <- ELMER:::getdata("TF.family")
family$HNF4A_HUMAN.H11MO.0.A %>% sort
## [1] "HNF4A" "HNF4G" "NR2C1" "NR2C2" "NR2E1" "NR2E3" "NR2F1" "NR2F2"
## [9] "NR2F6" "RXRA" "RXRB" "RXRG"
## [1] "HNF4A" "HNF4G"
# which is the enriched motif in the family
en.motif.family <- unlist(sapply(family$HNF4A_HUMAN.H11MO.0.A,
function(x) TF$motif[grep(x,TF$top.potential.TF.family)])
)
en.motif.family
## HNF4A1 HNF4A2 HNF4A3
## "HNF4A_HUMAN.H11MO.0.A" "HNF4G_HUMAN.H11MO.0.B" "NR2F6_HUMAN.H11MO.0.D"
## HNF4A4 HNF4A5 HNF4A6
## "RXRG_HUMAN.H11MO.0.B" "NR2E3_HUMAN.H11MO.0.C" "COT2_HUMAN.H11MO.1.A"
## HNF4A7
## "COT2_HUMAN.H11MO.0.A"
en.motif.subfamily <- unlist(sapply(subfamily$HNF4A_HUMAN.H11MO.0.A,
function(x) TF$motif[grep(x,TF$top.potential.TF.subfamily)])
)
en.motif.subfamily
## HNF4A1 HNF4A2
## "HNF4A_HUMAN.H11MO.0.A" "HNF4G_HUMAN.H11MO.0.B"
## [1] 455
## [1] 5
## [1] 203
## [1] 160
colnames(peaks) <- c("chrom","Start","End","Name","Width","Score", "fold_enrichment","-log10(pValue)","-log10(qValue)","peak")
peaks$chrom <- paste0("chr",peaks$chrom)
peaks.gr <- makeGRangesFromDataFrame(peaks,keep.extra.columns = T)
DT::datatable(peaks,options = list(scrollX = TRUE),filter = 'top')
pairs <- read_csv("~/paper_elmer/ESCA_analysis_hg38/escc_vs_esad_hg38/hyper/getTFtargets_genomic_coordinates_mapped_to_hg19.hyper.family.csv",
col_types = readr::cols())
targets <- pairs[grep("HNF4A",pairs$TF),]
DT::datatable(targets,options = list(scrollX = TRUE),filter = 'top')
probes <- makeGRangesFromDataFrame(targets,
keep.extra.columns = T,
seqnames.field = "probe_hg19_seqnames",
start.field = "probe_hg19_start",
end.field = "probe_hg19_end",
strand.field = "probe_hg19_strand"
)
probes <- unique(probes)
probes
## GRanges object with 455 ranges and 19 metadata columns:
## seqnames ranges strand | GeneID
## <Rle> <IRanges> <Rle> | <character>
## [1] chr6 [45979349, 45979350] * | ENSG00000001561
## [2] chr17 [37034884, 37034885] * | ENSG00000002834
## [3] chr7 [95546508, 95546509] * | ENSG00000004799
## [4] chr7 [95546539, 95546540] * | ENSG00000004799
## [5] chr7 [95957175, 95957176] * | ENSG00000004799
## ... ... ... ... . ...
## [451] chr2 [ 62797901, 62797902] * | ENSG00000266097
## [452] chr2 [ 62797865, 62797866] * | ENSG00000266097
## [453] chr2 [ 62797926, 62797927] * | ENSG00000266097
## [454] chr10 [127897162, 127897163] * | ENSG00000277371
## [455] chr10 [126211892, 126211893] * | ENSG00000278831
## Probe Symbol Sides Raw.p FDR
## <character> <character> <character> <numeric> <numeric>
## [1] cg16474725 ENPP4 R6 1.270122e-21 1.939775e-20
## [2] cg27061889 LASP1 R1 7.105825e-20 7.827895e-19
## [3] cg09136052 PDK4 L2 8.477625e-16 4.870098e-15
## [4] cg18450582 PDK4 L2 8.477625e-16 4.870098e-15
## [5] cg10694598 PDK4 L5 8.477625e-16 4.870098e-15
## ... ... ... ... ... ...
## [451] cg08573315 MIR5192 L10 8.319520e-08 1.670574e-07
## [452] cg27227250 MIR5192 L10 8.319520e-08 1.670574e-07
## [453] cg04036593 MIR5192 L10 8.319520e-08 1.670574e-07
## [454] cg17287034 Metazoa_SRP L7 1.373459e-06 2.310831e-06
## [455] cg16426764 AL513190.1 R3 9.809833e-06 1.445882e-05
## distNearestTSS DMC_analysis_pvalue DMC_analysis_escc_Minus_esad
## <integer> <numeric> <numeric>
## [1] 118379 2.576139e-23 0.3499644
## [2] 4741 1.656185e-36 0.3364163
## [3] 320704 1.948822e-21 0.3318075
## [4] 320735 6.976460e-20 0.3926457
## [5] 731371 8.877982e-24 0.3241807
## ... ... ... ...
## [451] 364940 5.470409e-55 0.3887874
## [452] 364904 1.437656e-57 0.4449732
## [453] 364965 2.720400e-58 0.4903376
## [454] 339298 1.362491e-20 0.3099911
## [455] 100028 6.455562e-32 0.3729131
## DMC_analysis_adjust.p external_gene_name original_ensembl_gene_id
## <numeric> <character> <character>
## [1] 3.119777e-21 ENPP4 ENSG00000001561.6
## [2] 1.227075e-33 LASP1 ENSG00000002834.16
## [3] 1.817628e-19 PDK4 ENSG00000004799.7
## [4] 5.281645e-18 PDK4 ENSG00000004799.7
## [5] 1.152399e-21 PDK4 ENSG00000004799.7
## ... ... ... ...
## [451] 6.558573e-51 MIR5192 ENSG00000266097.1
## [452] 2.708565e-53 MIR5192 ENSG00000266097.1
## [453] 7.175381e-54 MIR5192 ENSG00000266097.1
## [454] 1.145422e-18 Metazoa_SRP ENSG00000277371.1
## [455] 2.425544e-29 AL513190.1 ENSG00000278831.1
## TF
## <character>
## [1] HNF4A;HNF4G;NR2F2;NR2C1;FOXA3;FOXA2;FOXP4;FOXD2;FOXJ1;SPDEF;ELF3;NR1I2;PPARG;RORC;THRA;NR1H3;NR1H4;ZSCAN16;ZNF816;IRF8
## [2] HNF4A;HNF4G;NR2F2;NR2C1;FOXA3;FOXA2;FOXP4;FOXD2;FOXJ1;SPDEF;ELF3;NR3C2;ZSCAN16;ZNF816
## [3] HNF4A;HNF4G;NR2F2;NR2C1;FOXA3;FOXA2;FOXP4;FOXD2;FOXJ1;NR1I2;PPARG;RORC;THRA;NR1H3;NR1H4;NFATC2;SPDEF;ELF3;IRF8
## [4] HNF4A;HNF4G;NR2F2;NR2C1;FOXA3;FOXA2;FOXP4;FOXD2;FOXJ1;NR1I2;PPARG;RORC;THRA;NR1H3;NR1H4;NFATC2;IRF8;SPDEF;ELF3
## [5] HNF4A;HNF4G;NR2F2;NR2C1;FOXA3;FOXA2;FOXP4;FOXD2;FOXJ1;NR1I2;PPARG;RORC;THRA;NR1H3;NR1H4;NFATC2;ZSCAN16;ZNF816;SOX9
## ... ...
## [451] HNF4A;HNF4G;NR2F2;NR2C1;FOXA3;FOXA2;FOXP4;FOXD2;FOXJ1;ZNF620;ZSCAN16;ZNF774;ZNF468;ZNF33A;ZNF816;ZNF619;ZNF765;ZNF799;ZNF763;ZNF823;ZNF124;SPDEF;ELF3;SOX9
## [452] HNF4A;HNF4G;NR2F2;NR2C1;FOXA3;FOXA2;FOXP4;FOXD2;FOXJ1;ZNF620;ZSCAN16;ZNF774;ZNF468;ZNF33A;ZNF816;ZNF619;ZNF765;ZNF799;ZNF763;ZNF823;ZNF124;SPDEF;ELF3;SOX9
## [453] HNF4A;HNF4G;NR2F2;NR2C1;FOXA3;FOXA2;FOXP4;FOXD2;FOXJ1;IRF8;ZNF620;ZSCAN16;ZNF774;ZNF468;ZNF33A;ZNF816;ZNF619;ZNF765;ZNF799;ZNF763;ZNF823;ZNF124;SPDEF;ELF3;SOX9
## [454] FOXA3;FOXA2;FOXP4;FOXD2;FOXJ1;HNF4A;HNF4G;NR2F2;NR2C1;SPDEF;ELF3;GATA6;GATA4;HNF1A;HNF1B;PDX1;CDX2;HOXB6;HOXB5;MNX1;EVX1;CDX1;HOXA13;HOXB9;ZSCAN16;ZNF816
## [455] HNF4A;HNF4G;NR2F2;NR2C1;FOXA3;FOXA2;FOXP4;FOXD2;FOXJ1;NR1I2;PPARG;RORC;THRA;NR1H3;NR1H4;SPDEF;ELF3;SOX9;NR3C2
## probe_hg19_width gene_hg19_seqnames gene_hg19_start gene_hg19_end
## <integer> <character> <integer> <integer>
## [1] 2 chr6 46097730 46114436
## [2] 2 chr17 37026112 37078023
## [3] 2 chr7 95212811 95225803
## [4] 2 chr7 95212811 95225803
## [5] 2 chr7 95212811 95225803
## ... ... ... ... ...
## [451] 2 chr2 62432961 62433052
## [452] 2 chr2 62432961 62433052
## [453] 2 chr2 62432961 62433052
## [454] 2 chr10 127557864 127558145
## [455] 2 chr10 126311922 126312648
## gene_hg19_width gene_hg19_strand
## <integer> <character>
## [1] 16707 +
## [2] 51912 +
## [3] 12993 -
## [4] 12993 -
## [5] 12993 -
## ... ... ...
## [451] 92 +
## [452] 92 +
## [453] 92 +
## [454] 282 +
## [455] 727 +
## -------
## seqinfo: 23 sequences from an unspecified genome; no seqlengths
y <- distanceToNearest(probes,peaks.gr,ignore.strand=T)
qplot(mcols(y)$distance,geom="histogram") + ggthemes::theme_gdocs()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
probes_extended_500bp <- resize(unique(probes),width = 501,fix = "center")
probes_extended_500bp[,1:2]
## GRanges object with 455 ranges and 2 metadata columns:
## seqnames ranges strand | GeneID
## <Rle> <IRanges> <Rle> | <character>
## [1] chr6 [45979099, 45979599] * | ENSG00000001561
## [2] chr17 [37034634, 37035134] * | ENSG00000002834
## [3] chr7 [95546258, 95546758] * | ENSG00000004799
## [4] chr7 [95546289, 95546789] * | ENSG00000004799
## [5] chr7 [95956925, 95957425] * | ENSG00000004799
## ... ... ... ... . ...
## [451] chr2 [ 62797651, 62798151] * | ENSG00000266097
## [452] chr2 [ 62797615, 62798115] * | ENSG00000266097
## [453] chr2 [ 62797676, 62798176] * | ENSG00000266097
## [454] chr10 [127896912, 127897412] * | ENSG00000277371
## [455] chr10 [126211642, 126212142] * | ENSG00000278831
## Probe
## <character>
## [1] cg16474725
## [2] cg27061889
## [3] cg09136052
## [4] cg18450582
## [5] cg10694598
## ... ...
## [451] cg08573315
## [452] cg27227250
## [453] cg04036593
## [454] cg17287034
## [455] cg16426764
## -------
## seqinfo: 23 sequences from an unspecified genome; no seqlengths
## [1] 501
hits <- suppressWarnings(findOverlaps(peaks.gr,probes_extended_500bp))
probes.hit <- subjectHits(hits)
length(unique(probes_extended_500bp[probes.hit])) # unique regions
## [1] 39
## Downloading transcripts information. Using: Homo sapiens genes (GRCh37.p13)
## Returning distal probes: 163576
x <- matrix(c(
length(unique(probes[probes.hit])), # all linked probes with hits
(length(unique(probes)) - length(unique(probes[probes.hit]))), # all linked probes with no hits
length(unique(distal.probes.hit)), # all distal probes with hits (4216)
(length(distal.probes)) - length(unique(distal.probes.hit))), # all distal probes with no hits
nrow = 2,
dimnames = list(c("Overlap", "No overlap"),
c("Unique Linked probes", "Distal probes"))
)
x <- t(x)
x
## Overlap No overlap
## Unique Linked probes 39 416
## Distal probes 1063 162513
##
## Fisher's Exact Test for Count Data
##
## data: x
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 9.988211 20.051123
## sample estimates:
## odds ratio
## 14.33474
motifs.probes <- ELMER:::getdata("Probes.motif.hg38.450K")
probes.family <- rownames(motifs.probes)[rowSums(as.matrix(motifs.probes[,en.motif.family])) > 0]
distal.probes.family.motif <- distal.probes.extended_500bp[names(distal.probes) %in% probes.family]
hits.distal.family.motif <- suppressWarnings(findOverlaps(peaks.gr,distal.probes.family.motif))
distal.probes.family.motif.hit <- subjectHits(hits.distal.family.motif)
x <- matrix(c(
length(unique(probes[probes.hit])), # all linked probes with hits
(length(unique(probes)) - length(unique(probes[probes.hit]))), # all linked probes with no hits
length(unique(distal.probes.family.motif.hit)), # all distal probes with hits (4216)
(length(distal.probes.family.motif)) - length(unique(distal.probes.family.motif.hit))), # all distal probes with no hits
nrow = 2,
dimnames = list(c("Overlap", "No overlap"),
c("Unique Linked probes", "Distal probes with family motif signature"))
)
x <- t(x)
x
## Overlap No overlap
## Unique Linked probes 39 416
## Distal probes with family motif signature 546 64899
##
## Fisher's Exact Test for Count Data
##
## data: x
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 7.727622 15.686300
## sample estimates:
## odds ratio
## 11.14414
# Get probes family/subfamily
motifs.probes <- ELMER:::getdata("Probes.motif.hg38.450K")
probes.family <- rownames(motifs.probes)[rowSums(as.matrix(motifs.probes[,en.motif.family])) > 0]
probes.subfamily <- rownames(motifs.probes)[rowSums(as.matrix(motifs.probes[,en.motif.subfamily])) > 0]
distal.probes.family.nomotif <- distal.probes.extended_500bp[!names(distal.probes) %in% probes.family]
hits.distal.family.nomotif <- suppressWarnings(findOverlaps(peaks.gr,distal.probes.family.nomotif))
distal.probes.family.nomotif.hit <- subjectHits(hits.distal.family.nomotif)
x <- matrix(c(
length(unique(probes[probes.hit])), # all linked probes with hits
(length(unique(probes)) - length(unique(probes[probes.hit]))), # all linked probes with no hits
length(unique(distal.probes.family.nomotif.hit)), # all distal probes with hits (4216)
(length(distal.probes.family.nomotif)) - length(unique(distal.probes.family.nomotif.hit))), # all distal probes with no hits
nrow = 2,
dimnames = list(c("Overlap", "No overlap"),
c("Unique Linked probes", "Distal probes without family motif signature"))
)
x <- t(x)
x
## Overlap No overlap
## Unique Linked probes 39 416
## Distal probes without family motif signature 517 97614
##
## Fisher's Exact Test for Count Data
##
## data: x
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 12.26892 24.92618
## sample estimates:
## odds ratio
## 17.69461
pairs <- read_csv("~/paper_elmer/ESCA_analysis_hg38/escc_vs_esad_hg38/hyper/getTFtargets_genomic_coordinates_mapped_to_hg19.hyper.subfamily.csv",
col_types = readr::cols())
targets <- pairs[grep("HNF4A",pairs$TF),]
probes <- makeGRangesFromDataFrame(targets,
keep.extra.columns = T,
seqnames.field = "probe_hg19_seqnames",
start.field = "probe_hg19_start",
end.field = "probe_hg19_end",
strand.field = "probe_hg19_strand"
)
probes <- unique(probes)
probes
## GRanges object with 203 ranges and 19 metadata columns:
## seqnames ranges strand | GeneID
## <Rle> <IRanges> <Rle> | <character>
## [1] chr6 [45979349, 45979350] * | ENSG00000001561
## [2] chr17 [37034884, 37034885] * | ENSG00000002834
## [3] chr7 [95546508, 95546509] * | ENSG00000004799
## [4] chr7 [95546539, 95546540] * | ENSG00000004799
## [5] chr7 [95957175, 95957176] * | ENSG00000004799
## ... ... ... ... . ...
## [199] chr7 [ 27173480, 27173481] * | ENSG00000254369
## [200] chr2 [ 62797901, 62797902] * | ENSG00000266097
## [201] chr2 [ 62797865, 62797866] * | ENSG00000266097
## [202] chr2 [ 62797926, 62797927] * | ENSG00000266097
## [203] chr10 [126211892, 126211893] * | ENSG00000278831
## Probe Symbol Sides Raw.p FDR
## <character> <character> <character> <numeric> <numeric>
## [1] cg16474725 ENPP4 R6 1.270122e-21 1.939775e-20
## [2] cg27061889 LASP1 R1 7.105825e-20 7.827895e-19
## [3] cg09136052 PDK4 L2 8.477625e-16 4.870098e-15
## [4] cg18450582 PDK4 L2 8.477625e-16 4.870098e-15
## [5] cg10694598 PDK4 L5 8.477625e-16 4.870098e-15
## ... ... ... ... ... ...
## [199] cg17285448 HOXA-AS3 R2 3.201199e-06 5.089675e-06
## [200] cg08573315 MIR5192 L10 8.319520e-08 1.670574e-07
## [201] cg27227250 MIR5192 L10 8.319520e-08 1.670574e-07
## [202] cg04036593 MIR5192 L10 8.319520e-08 1.670574e-07
## [203] cg16426764 AL513190.1 R3 9.809833e-06 1.445882e-05
## distNearestTSS DMC_analysis_pvalue DMC_analysis_escc_Minus_esad
## <integer> <numeric> <numeric>
## [1] 118379 2.576139e-23 0.3499644
## [2] 4741 1.656185e-36 0.3364163
## [3] 320704 1.948822e-21 0.3318075
## [4] 320735 6.976460e-20 0.3926457
## [5] 731371 8.877982e-24 0.3241807
## ... ... ... ...
## [199] 3884 5.267260e-26 0.3772209
## [200] 364940 5.470409e-55 0.3887874
## [201] 364904 1.437656e-57 0.4449732
## [202] 364965 2.720400e-58 0.4903376
## [203] 100028 6.455562e-32 0.3729131
## DMC_analysis_adjust.p external_gene_name original_ensembl_gene_id
## <numeric> <character> <character>
## [1] 3.119777e-21 ENPP4 ENSG00000001561.6
## [2] 1.227075e-33 LASP1 ENSG00000002834.16
## [3] 1.817628e-19 PDK4 ENSG00000004799.7
## [4] 5.281645e-18 PDK4 ENSG00000004799.7
## [5] 1.152399e-21 PDK4 ENSG00000004799.7
## ... ... ... ...
## [199] 9.286785e-24 HOXA-AS3 ENSG00000254369.5
## [200] 6.558573e-51 MIR5192 ENSG00000266097.1
## [201] 2.708565e-53 MIR5192 ENSG00000266097.1
## [202] 7.175381e-54 MIR5192 ENSG00000266097.1
## [203] 2.425544e-29 AL513190.1 ENSG00000278831.1
## TF
## <character>
## [1] HNF4A;HNF4G;ELF3;PPARG;FOXD2;NR1H3;NR1H4;ZSCAN16
## [2] HNF4A;HNF4G;FOXA3;FOXA2;FOXD2;ELF3;NR3C2;ZSCAN16
## [3] HNF4A;HNF4G;FOXA3;FOXA2;FOXD2;NR1H3;NR1H4;NR2F2
## [4] HNF4A;HNF4G;FOXA3;FOXA2;FOXD2;NR1H3;NR1H4;NR2F2
## [5] HNF4A;HNF4G;NR2F2;PPARG;FOXD2;NR1H3;NR1H4;ZSCAN16
## ... ...
## [199] HNF4A;HNF4G;NR2F2;PPARG;NR1H3;NR1H4;TCF7L2;NR1I2;NR3C2
## [200] HNF4A;HNF4G;NR2F2;FOXD2
## [201] HNF4A;HNF4G;NR2F2;FOXD2
## [202] HNF4A;HNF4G;NR2F2;FOXD2
## [203] HNF4A;HNF4G;FOXA3;FOXA2;FOXD2;PPARG;ELF3;NR1H3;NR1H4;NR2F2;NR3C2
## probe_hg19_width gene_hg19_seqnames gene_hg19_start gene_hg19_end
## <integer> <character> <integer> <integer>
## [1] 2 chr6 46097730 46114436
## [2] 2 chr17 37026112 37078023
## [3] 2 chr7 95212811 95225803
## [4] 2 chr7 95212811 95225803
## [5] 2 chr7 95212811 95225803
## ... ... ... ... ...
## [199] 2 chr7 27169596 27195547
## [200] 2 chr2 62432961 62433052
## [201] 2 chr2 62432961 62433052
## [202] 2 chr2 62432961 62433052
## [203] 2 chr10 126311922 126312648
## gene_hg19_width gene_hg19_strand
## <integer> <character>
## [1] 16707 +
## [2] 51912 +
## [3] 12993 -
## [4] 12993 -
## [5] 12993 -
## ... ... ...
## [199] 25952 +
## [200] 92 +
## [201] 92 +
## [202] 92 +
## [203] 727 +
## -------
## seqinfo: 22 sequences from an unspecified genome; no seqlengths
probes_extended_500bp <- resize(unique(probes),width = 501,fix = "center")
probes_extended_500bp[,1:2]
## GRanges object with 203 ranges and 2 metadata columns:
## seqnames ranges strand | GeneID
## <Rle> <IRanges> <Rle> | <character>
## [1] chr6 [45979099, 45979599] * | ENSG00000001561
## [2] chr17 [37034634, 37035134] * | ENSG00000002834
## [3] chr7 [95546258, 95546758] * | ENSG00000004799
## [4] chr7 [95546289, 95546789] * | ENSG00000004799
## [5] chr7 [95956925, 95957425] * | ENSG00000004799
## ... ... ... ... . ...
## [199] chr7 [ 27173230, 27173730] * | ENSG00000254369
## [200] chr2 [ 62797651, 62798151] * | ENSG00000266097
## [201] chr2 [ 62797615, 62798115] * | ENSG00000266097
## [202] chr2 [ 62797676, 62798176] * | ENSG00000266097
## [203] chr10 [126211642, 126212142] * | ENSG00000278831
## Probe
## <character>
## [1] cg16474725
## [2] cg27061889
## [3] cg09136052
## [4] cg18450582
## [5] cg10694598
## ... ...
## [199] cg17285448
## [200] cg08573315
## [201] cg27227250
## [202] cg04036593
## [203] cg16426764
## -------
## seqinfo: 22 sequences from an unspecified genome; no seqlengths
## [1] 501
hits <- suppressWarnings(findOverlaps(peaks.gr,probes_extended_500bp))
probes.hit <- subjectHits(hits)
length(unique(probes_extended_500bp[probes.hit])) # unique regions
## [1] 34
x <- matrix(c(
length(unique(probes[probes.hit])), # all linked probes with hits
(length(unique(probes)) - length(unique(probes[probes.hit]))), # all linked probes with no hits
length(unique(distal.probes.hit)), # all distal probes with hits (4216)
(length(distal.probes)) - length(unique(distal.probes.hit))), # all distal probes with no hits
nrow = 2,
dimnames = list(c("Overlap", "No overlap"),
c("Unique Linked probes", "Distal probes"))
)
x <- t(x)
x
## Overlap No overlap
## Unique Linked probes 34 169
## Distal probes 1063 162513
##
## Fisher's Exact Test for Count Data
##
## data: x
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 20.51252 44.95357
## sample estimates:
## odds ratio
## 30.75854
distal.probes.subfamily.motif <- distal.probes.extended_500bp[names(distal.probes.extended_500bp) %in% probes.subfamily]
hits.distal.subfamily.motif <- suppressWarnings(findOverlaps(peaks.gr,distal.probes.subfamily.motif))
distal.probes.subfamily.motif.hit <- subjectHits(hits.distal.subfamily.motif)
x <- matrix(c(
length(unique(probes[probes.hit])), # all linked probes with hits
(length(unique(probes)) - length(unique(probes[probes.hit]))), # all linked probes with no hits
length(unique(distal.probes.subfamily.motif.hit)), # all distal probes with hits (4216)
(length(distal.probes.subfamily.motif)) - length(unique(distal.probes.subfamily.motif.hit))), # all distal probes with no hits
nrow = 2,
dimnames = list(c("Overlap", "No overlap"),
c("Unique Linked probes", "Distal probes with subfamily motif signature"))
)
x <- t(x)
x
## Overlap No overlap
## Unique Linked probes 34 169
## Distal probes with subfamily motif signature 297 17743
##
## Fisher's Exact Test for Count Data
##
## data: x
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 7.913615 17.798224
## sample estimates:
## odds ratio
## 12.01189
distal.probes.subfamily.nomotif <- distal.probes.extended_500bp[!names(distal.probes) %in% probes.subfamily]
hits.distal.subfamily.nomotif <- suppressWarnings(findOverlaps(peaks.gr,distal.probes.subfamily.nomotif))
distal.probes.subfamily.nomotif.hit <- subjectHits(hits.distal.subfamily.nomotif)
x <- matrix(c(
length(unique(probes[probes.hit])), # all linked probes with hits
(length(unique(probes)) - length(unique(probes[probes.hit]))), # all linked probes with no hits
length(unique(distal.probes.subfamily.nomotif.hit)), # all distal probes with hits (4216)
(length(distal.probes.subfamily.nomotif)) - length(unique(distal.probes.subfamily.nomotif.hit))), # all distal probes with no hits
nrow = 2,
dimnames = list(c("Overlap", "No overlap"),
c("Unique Linked probes", "Distal probes without subfamily motif signature"))
)
x <- t(x)
x
## Overlap No overlap
## Unique Linked probes 34 169
## Distal probes without subfamily motif signature 766 144770
##
## Fisher's Exact Test for Count Data
##
## data: x
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 25.31216 55.70002
## sample estimates:
## odds ratio
## 38.01406
names(probes_extended_500bp) <- probes_extended_500bp$Probe
p <- probes_extended_500bp[targets$Probe]
mat1 <- normalizeToMatrix(peaks.gr,
p,
value_column = "peak",
extend = 5000,
mean_mode = "w0",
w = 50)
set.seed(1234)
chip.heatmap <- EnrichedHeatmap(mat1,
#km = 2, # Kmeans cluster
#row_title_rot = 0,
#cluster_rows = TRUE,
#top_annotation = HeatmapAnnotation(enriched = anno_enriched(gp = gpar(col = 2:4, lty = 1:3))),
#split = sample(c("A", "B"), 455, replace = TRUE),
name = "HNF4A TF",
col = c("white", "red"))
exp <- read_tsv("/hdd/ESCA/QD320C/QD320C_results_hg38_gencodev28/stringtieFPKM/Eso26siHNF4AB1-SA06722_S7_L002_1Aligned.sortedByCoord.out.gene_abund.txt",col_types = readr::cols())
exp$`Gene ID` <- sapply(exp$`Gene ID`, function(x) unlist(stringr::str_split(x,"\\."))[1])
exp <- exp[match(targets$GeneID,exp$`Gene ID`),]
exp.heatmap <- Heatmap(log2(exp$TPM + 1),
col = c("white", "orange"),
name = "Targets log2(TPM + 1) - Eso26siHNF4AB1",
show_row_names = FALSE,
width = unit(5, "mm")
)
exp2 <- read_tsv("/hdd/ESCA/QD320C/QD320C_results_hg38_gencodev28/stringtieFPKM/Eso26siHNF4AB2-SA06723_S8_L002_1Aligned.sortedByCoord.out.gene_abund.txt",col_types = readr::cols())
exp2$`Gene ID` <- sapply(exp2$`Gene ID`, function(x) unlist(stringr::str_split(x,"\\."))[1])
exp2 <- exp2[match(targets$GeneID,exp$`Gene ID`),]
exp.heatmap2 <- Heatmap(log2(exp2$TPM + 1),
col = c("white", "orange"),
name = "Targets log2(TPM + 1) - Eso26siHNF4AB2",
show_row_names = FALSE,
width = unit(5, "mm")
)
meth_col_fun = circlize::colorRamp2(c(min(p$DMC_analysis_escc_Minus_esad),
max(p$DMC_analysis_escc_Minus_esad)),
c("white", "red"))
dna.met.heatmap <- Heatmap(p$DMC_analysis_escc_Minus_esad,
name = "Diff DNA met (escc - esad)",
col = meth_col_fun,
width = unit(5, "mm"))
pair.pvalue.heatmap <- Heatmap(-log10(p$FDR),
name = "-log10(Pair FDR)",
width = unit(5, "mm"))
chip.heatmap + exp.heatmap + exp.heatmap2 + dna.met.heatmap + pair.pvalue.heatmap