We will perform for the TCGA-BRCA data set DMC analysis using a two-tailed t-test and one-tailed t-test to compare the results.
library(ELMER)
# Reload TCGA BRCA dataset
load("~/paper_elmer/mae_BRCA_hg38_450K_no_ffpe.rda")
# one tailed test: probes hypo methylated in group1 vs group2
Hypo.probe <- get.diff.meth(mae,
diff.dir="hypo",
minSubgroupFrac = 1,
group.col = "definition",
group1 = "Primary solid Tumor",
group2 = "Solid Tissue Normal",
sig.dif = 0.3) # get hypomethylated probes
# one tailed test: probes hyper methylated in group1 vs group2
Hyper.probe <- get.diff.meth(mae,
diff.dir="hyper",
minSubgroupFrac = 1,
group.col = "definition",
group1 = "Primary solid Tumor",
group2 = "Solid Tissue Normal",
sig.dif = 0.3) # get hypermethylated probes
# two tailed test: probes differently methylated in group1 vs group2
diff.probe <- get.diff.meth(mae,
diff.dir= "both",
minSubgroupFrac = 1,
group.col = "definition",
group1 = "Primary solid Tumor",
group2 = "Solid Tissue Normal",
sig.dif = 0.3) # get differentlly methylated probes
Hypo.probe
Hyper.probe
diff.probe
One tailed test - Hypermethylated probes
One tailed test - Hypomethylated probes
Two tailed test - Differently methylated probes
df <- t(data.frame("Hypomethylated probes [one-tailed]" = length(Hypo.probe$probe),
"Hypermethylated probes [one-tailed]" = length(Hyper.probe$probe),
"Differently methylated probes [two-tailed]" = length(diff.probe$probe)))
colnames(df) <- "# probes"
as.data.frame(df)
Here we check that all our probes identified by two-tailed t-test the same of the two one-tailed t-test.
# Are all hypo methylated probes identified using a one-tailed test also found using the two-tailed test?
table(Hypo.probe$probe %in% diff.probe$probe)
TRUE
1446
# Are all hypermethylated probes identified using a one-tailed test also found using the two-tailed test?
table(Hyper.probe$probe %in% diff.probe$probe)
TRUE
1077
# Are all differently methylated probes identified using two-tailed test also found using two one-tailed test?
all(diff.probe$probe %in% c(Hyper.probe$probe,Hypo.probe$probe))
[1] TRUE
df <- t(data.frame(
as.numeric(dist(rbind(diff.probe[Hypo.probe$probe,]$pvalue, Hypo.probe$pvalue))),
as.numeric(dist(rbind(diff.probe[Hypo.probe$probe,]$adjust.p, Hypo.probe$adjust.p))),
as.numeric(dist(rbind(diff.probe[Hyper.probe$probe,]$pvalue, Hyper.probe$pvalue))),
as.numeric(dist(rbind(diff.probe[Hyper.probe$probe,]$adjust.p, Hyper.probe$adjust.p)))
))
colnames(df) <- "distance"
rownames(df) <- c("eucledian distance pvalue hypomethylated probes [two-tailed vs one tailed test]",
"eucledian distance pvalue adjusted hypomethylated probes [two-tailed vs one tailed test]",
"eucledian distance pvalues hypermethylated probes [two-tailed vs one tailed test]",
"eucledian distance pvalues adjusted hypermethylated probes [two-tailed vs one tailed test]")
as.data.frame(df)
The plot below shows that the difference of the raw pvalues for the significant probes is really low and would not affect which were selected the significant probes.
data <- data.frame(probe = one_tailed_hypo$probe,
x = one_tailed_hypo$pvalue,
significant = as.factor(ifelse(one_tailed_hypo$probe %in% Hypo.probe$probe, "Significant","Insignificant")) ,
y = abs(two_tailed[match(one_tailed_hypo$probe,two_tailed$probe),]$pvalue-one_tailed_hypo$pvalue))
data <- subset(data, x < 0.5)
data$y <- -log10(data$y)
data$x <- -log10(data$x)
ggplot(data,aes(x=x,
y=y,
shape = significant,
color = significant)) +
geom_point() +
geom_point(data = subset(data, significant == 'Significant'),
aes(x = x,
y = y,
color = significant)
) +
theme_bw() +
scale_color_manual(values = c("Significant" = '#ff0000',
'Insignificant' = '#000000'),
name = "Probe identified as:") +
scale_shape_manual(values = c('Significant' = 17,
'Insignificant' = 16),
name="Probe identified as:") +
labs(title = "Comparing one-tailed (hypo direction) and two-tailed raw-pvalues",
subtitle = "Showing only probes raw p-value < 0.5.",
y = "-log10(Difference of raw p-values Two-tailed t-test - One tailed t-test)",
x = "-log10(Raw P-value - One tailed t-test)")
data <- data.frame(probe = one_tailed_hyper$probe,
x = one_tailed_hyper$pvalue,
significant = as.factor(ifelse(one_tailed_hyper$probe %in% Hyper.probe$probe, "Significant","Insignificant")) ,
y = abs(two_tailed[match(one_tailed_hyper$probe,two_tailed$probe),]$pvalue-one_tailed_hyper$pvalue))
data <- subset(data, x < 0.5)
data$y <- -log10(data$y)
data$x <- -log10(data$x)
ggplot(data,aes(x=x,
y=y,
shape = significant,
color = significant)) +
geom_point() +
geom_point(data = subset(data, significant == 'Significant'),
aes(x = x,
y = y,
color = significant)
) +
theme_bw() +
scale_color_manual(values = c("Significant" = '#ff0000',
'Insignificant' = '#000000'),
name = "Probe identified as:") +
scale_shape_manual(values = c('Significant' = 17,
'Insignificant' = 16),
name="Probe identified as:") +
labs(title = "Comparing one-tailed (hyper direction) and two-tailed raw-pvalues",
subtitle = "Showing only probes raw p-value < 0.5.",
y = "-log10(Difference of raw p-values Two-tailed t-test - One tailed t-test)",
x = "-log10(Raw P-value - One tailed t-test)")
data <- data.frame(probe = one_tailed_hypo$probe,
x = one_tailed_hypo$adjust.p,
significant = as.factor(ifelse(one_tailed_hypo$probe %in% Hypo.probe$probe, "Significant","Insignificant")) ,
y = abs(two_tailed[match(one_tailed_hypo$probe,two_tailed$probe),]$adjust.p-one_tailed_hypo$adjust.p))
data <- subset(data, x < 0.5)
data$y <- -log10(data$y)
data$x <- -log10(data$x)
ggplot(data,aes(x=x,
y=y,
shape = significant,
color = significant)) +
geom_point() +
geom_point(data = subset(data, significant == 'Significant'),
aes(x = x,
y = y,
color = significant)
) +
theme_bw() +
scale_color_manual(values = c("Significant" = '#ff0000',
'Insignificant' = '#000000'),
name = "Probe identified as:") +
scale_shape_manual(values = c('Significant' = 17,
'Insignificant' = 16),
name="Probe identified as:") +
geom_vline(xintercept=-log10(0.01), linetype="dashed", color = "blue") +
labs(title = "Comparing one-tailed (hypo direction) and two-tailed adjusted-pvalues",
subtitle = "Showing only probes adjusted p-value < 0.5. Dashed blue line: 0.01 cut-off",
y = "-log10(|Difference of raw p-values Two-tailed t-test - One tailed t-test|)",
x = "-log10(Adjusted P-value - One tailed t-test)")
data <- data.frame(probe = one_tailed_hyper$probe,
x = one_tailed_hyper$adjust.p,
significant = as.factor(ifelse(one_tailed_hyper$probe %in% Hyper.probe$probe, "Significant","Insignificant")) ,
y = abs(two_tailed[match(one_tailed_hyper$probe,two_tailed$probe),]$adjust.p-one_tailed_hyper$adjust.p))
data <- subset(data, x < 0.5)
data$y <- -log10(data$y)
data$x <- -log10(data$x)
ggplot(data,aes(x=x,
y=y,
shape = significant,
color = significant)) +
geom_point() +
geom_point(data = subset(data, significant == 'Significant'),
aes(x = x,
y = y,
color = significant)
) +
theme_bw() +
scale_color_manual(values = c("Significant" = '#ff0000',
'Insignificant' = '#000000'),
name = "Probe identified as:") +
scale_shape_manual(values = c('Significant' = 17,
'Insignificant' = 16),
name="Probe identified as:") +
geom_vline(xintercept=-log10(0.01), linetype="dashed", color = "blue") +
labs(title = "Comparing one-tailed (hyper direction) and two-tailed adjusted-pvalues",
subtitle = "Showing only probes adjusted p-value < 0.5. Dashed blue line: 0.01 cut-off",
y = "-log10(|Difference of raw p-values Two-tailed t-test - One tailed t-test|)",
x = "-log10(Adjusted P-value - One tailed t-test)")
range(diff.probe$Primary.solid.Tumor_Minus_Solid.Tissue.Normal)
[1] -0.4839175 0.5459320
range(Hyper.probe$Primary.solid.Tumor_Minus_Solid.Tissue.Normal)
[1] 0.3000309 0.5459320
range(Hypo.probe$Primary.solid.Tumor_Minus_Solid.Tissue.Normal)
[1] -0.4839175 -0.3000173