Quick analysis of CCPs obtained using different methods on 100 families of Deltaproteobacteria.
setwd("/home/boussau/Data/TransferRelated/ComparisonCCPDavin/ComparisonCCPs")
d<-read.table("ComparisonCCDs.tsv", h=T)
summary(d)
FM IQDIST_D IQDIST_L IQDIST_LL IQDIST_RD IQDIST_RL
DELTA000004: 1 Min. : 0.000 Min. : 0.000 Min. :-221.439 Min. :0.000001 Min. :0.000001
DELTA000110: 1 1st Qu.: 0.000 1st Qu.: 2.252 1st Qu.: -87.649 1st Qu.:0.000001 1st Qu.:0.057327
DELTA000139: 1 Median : 0.025 Median : 5.580 Median : -48.546 Median :0.001411 Median :0.165886
DELTA000170: 1 Mean : 2.357 Mean : 7.181 Mean : -66.970 Mean :0.097253 Mean :0.202848
DELTA000341: 1 3rd Qu.: 3.110 3rd Qu.: 9.565 3rd Qu.: -29.078 3rd Qu.:0.097512 3rd Qu.:0.335495
DELTA000356: 1 Max. :24.000 Max. :43.360 Max. : -5.339 Max. :1.025130 Max. :0.633318
(Other) :94
IQDIST_RT IQDIST_T IQNEWDIST_D IQNEWDIST_L IQNEWDIST_LL IQNEWDIST_RD
Min. :0.000001 Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. :-235.182 Min. :0.000001
1st Qu.:0.023388 1st Qu.: 1.018 1st Qu.: 0.000 1st Qu.: 2.165 1st Qu.: -88.914 1st Qu.:0.000001
Median :0.076179 Median : 3.625 Median : 0.020 Median : 5.635 Median : -48.566 Median :0.002423
Mean :0.103737 Mean : 5.584 Mean : 2.319 Mean : 7.254 Mean : -66.917 Mean :0.097831
3rd Qu.:0.151390 3rd Qu.: 7.965 3rd Qu.: 3.105 3rd Qu.: 9.460 3rd Qu.: -29.093 3rd Qu.:0.097245
Max. :0.493569 Max. :24.400 Max. :24.000 Max. :36.410 Max. : -5.344 Max. :1.025130
IQNEWDIST_RL IQNEWDIST_RT IQNEWDIST_T PBCATDIST_D PBCATDIST_L PBCATDIST_LL
Min. :0.000001 Min. :0.000001 Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. :-224.285
1st Qu.:0.058323 1st Qu.:0.026169 1st Qu.: 1.020 1st Qu.: 0.000 1st Qu.: 1.700 1st Qu.: -90.931
Median :0.172801 Median :0.069065 Median : 3.475 Median : 0.345 Median : 4.655 Median : -49.464
Mean :0.205269 Mean :0.098739 Mean : 5.575 Mean : 2.261 Mean : 6.058 Mean : -65.631
3rd Qu.:0.337602 3rd Qu.:0.154671 3rd Qu.: 7.800 3rd Qu.: 3.050 3rd Qu.: 8.535 3rd Qu.: -28.155
Max. :0.635611 Max. :0.441210 Max. :28.160 Max. :24.000 Max. :30.860 Max. : -6.352
PBCATDIST_RD PBCATDIST_RL PBCATDIST_RT PBCATDIST_T PBLGCATDIST_D PBLGCATDIST_L
Min. :0.000001 Min. :0.000001 Min. :0.000001 Min. : 0.000 Min. : 0.000 Min. : 0.000
1st Qu.:0.000001 1st Qu.:0.045078 1st Qu.:0.022838 1st Qu.: 1.040 1st Qu.: 0.000 1st Qu.: 1.950
Median :0.006233 Median :0.132756 Median :0.065688 Median : 4.110 Median : 0.000 Median : 4.365
Mean :0.094619 Mean :0.183016 Mean :0.095169 Mean : 5.058 Mean : 2.234 Mean : 6.093
3rd Qu.:0.085012 3rd Qu.:0.317036 3rd Qu.:0.134741 3rd Qu.: 7.497 3rd Qu.: 3.062 3rd Qu.: 8.357
Max. :1.028940 Max. :0.651366 Max. :0.470747 Max. :22.270 Max. :24.000 Max. :36.150
PBLGCATDIST_LL PBLGCATDIST_RD PBLGCATDIST_RL PBLGCATDIST_RT PBLGCATDIST_T
Min. :-215.031 Min. :0.0000010 Min. :0.000001 Min. :0.000001 Min. : 0.000
1st Qu.: -81.391 1st Qu.:0.0000010 1st Qu.:0.043473 1st Qu.:0.025359 1st Qu.: 1.048
Median : -48.422 Median :0.0001322 Median :0.144134 Median :0.060798 Median : 3.280
Mean : -63.497 Mean :0.0934242 Mean :0.187107 Mean :0.092349 Mean : 4.841
3rd Qu.: -28.306 3rd Qu.:0.0905567 3rd Qu.:0.320606 3rd Qu.:0.130447 3rd Qu.: 7.308
Max. : -5.402 Max. :1.0207100 Max. :0.623895 Max. :0.449569 Max. :22.690
t.test(d$IQDIST_T, d$IQNEWDIST_T, paired = T)
Paired t-test
data: d$IQDIST_T and d$IQNEWDIST_T
t = 0.046398, df = 99, p-value = 0.9631
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.3466503 0.3632503
sample estimates:
mean of the differences
0.0083
t.test(d$PBCATDIST_T, d$IQDIST_T, paired = T)
Paired t-test
data: d$PBCATDIST_T and d$IQDIST_T
t = -2.5893, df = 99, p-value = 0.01107
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.9281945 -0.1228055
sample estimates:
mean of the differences
-0.5255
t.test(d$PBCATDIST_LL, d$IQDIST_LL, paired = T)
Paired t-test
data: d$PBCATDIST_LL and d$IQDIST_LL
t = 2.2792, df = 99, p-value = 0.0248
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
0.1733311 2.5053721
sample estimates:
mean of the differences
1.339352
t.test(d$PBCATDIST_LL, d$PBLGCATDIST_LL, paired = T)
Paired t-test
data: d$PBCATDIST_LL and d$PBLGCATDIST_LL
t = -3.5649, df = 99, p-value = 0.0005622
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-3.3215050 -0.9461504
sample estimates:
mean of the differences
-2.133828
t.test(d$PBCATDIST_T, d$PBLGCATDIST_T, paired = T)
Paired t-test
data: d$PBCATDIST_T and d$PBLGCATDIST_T
t = 1.4351, df = 99, p-value = 0.1544
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.08303086 0.51703086
sample estimates:
mean of the differences
0.217
# Let's rank by the loglk
dll <- cbind(d$IQDIST_LL, d$IQNEWDIST_LL, d$PBCATDIST_LL, d$PBLGCATDIST_LL)
cols <- apply (dll, 1, function (x) {which(max(x)==x)} )
table(unlist(cols))
1 2 3 4
16 18 23 44
In 44 cases, PBLG+CAT is best, then in 23 cases, PB+CAT is best, then in 18 cases IQ TEST is best, then in 16 cases IQ is best.
# Add extra space to right of plot area; change clipping to figure
par(mar=c(5.1, 4.1, 4.1, 10.1), xpd=TRUE)
plot(1:length(unlist(cols)), unlist(cols), col=unlist(cols), ylab="Best gene tree according to ALE dated (logLk)", xlab="Delta proteobacteria family", pch=20)
legend("topright",inset=c(-0.3,0), c("IQ", "IQ TEST", "PB CAT", "PBLG+CAT"), pch = c(20,20,20,20), col = c(1,2,3,4))