counts<- read.csv("/Users/mtang1/projects/diff_ChIP_test/results/broad_peaks_all/counts_table.csv",
header = F)
## give some meaniningful names
names(counts)<- c("peak_id", "chr", "start", "end", "strand", "length",
"subRead_Mcf7_input", "subRead_Mcf7Rep1", "subRead_Mcf7Rep2",
"subRead_Panc1_input", "subRead_Panc1Rep1", "subRead_Panc1Rep2",
"bedtools_Mcf7_input", "bedtools_Mcf7Rep1", "bedtools_Mcf7Rep2",
"bedtools_Panc1_input", "bedtools_Panc1Rep1", "bedtools_Panc1Rep2")
library(ggplot2)
ggplot(counts) + geom_point(aes(x=log2(subRead_Mcf7Rep1 +1 ), y=log2(bedtools_Mcf7Rep1 +1))) +
geom_abline(intercept = 0, slope = 1, col="red")
Subread counts and bedtools counts are very similar. see correlation below.
cor(log2(counts$subRead_Mcf7Rep1+1), log2(counts$bedtools_Mcf7Rep1+1))
## [1] 0.999985
counts between replicates:
ggplot(counts) + geom_point(aes(x=log2(subRead_Mcf7Rep1 +1 ), y=log2(subRead_Mcf7Rep2 +1))) +
geom_abline(intercept = 0, slope = 1, col="red")
ggplot(counts) + geom_point(aes(x=log2(subRead_Panc1Rep1 +1 ), y=log2(subRead_Panc1Rep2 +1))) +
geom_abline(intercept = 0, slope = 1, col="red")
counts between different cell lines
ggplot(counts) + geom_point(aes(x=log2(subRead_Mcf7Rep1 +1 ), y=log2(subRead_Panc1Rep1 +1))) +
geom_abline(intercept = 0, slope = 1, col="red")
It is more variable between cell lines. See paper Sequence and chromatin determinants of cell-type–specific transcription factor binding Fig5B.
pearson correlation of different counts
cor(log2(counts$subRead_Mcf7Rep1+1), log2(counts$subRead_Mcf7Rep2+1))
## [1] 0.9742128
cor(log2(counts$subRead_Panc1Rep1+1), log2(counts$subRead_Panc1Rep2+1))
## [1] 0.960262
cor(log2(counts$subRead_Mcf7Rep1+1), log2(counts$subRead_Panc1Rep1+1))
## [1] 0.5210859
subread_counts<- counts[,7:12]
X<- subread_counts
sv = svd(t(X))
U = sv$u
V = sv$v
D = sv$d
Z<- t(X) %*% V
cols = as.numeric(as.factor(colnames(X)))
plot(Z[,1], Z[,2], type ="n", xlab="PC1", ylab="PC2")
text(Z[,1], Z[,2], colnames(X), col=cols)
MCF7 replicates cluster together, Panc1 replicates cluster together and inputs cluster together.