require(dplyr)
require(ggplot2)
require(RColorBrewer)
require(reshape2)
require(tidyr)
require(igraph) # for the clustering analysis
# require(compositions) # needed for the clr function
## the compositions package is only needed if you want to use Lovells functions (mainly if you want to calculate the logratio variance)
source(file = "../Functions/ProportionalityFuncs.R")
source(file = "../Functions/MemoryFuncs.R")
I did not follow and question all steps here (such as removing not complete cases), important is, in the end of the Getting the Data section you have:
RNA.seq <- read.csv("./data/RNA.seq.csv", header=T, skip=1)
microarray <- read.csv("./data/microarray.csv", header=T, skip=3)
names(microarray) <- sub("T", "timepoint", names(microarray))
go <- read.csv("./data/Complex_annotation", header=T, sep="\t")
names(go)[6] <- "Systematic.name"
tmp <- data.frame(Systematic.name=RNA.seq$Systematic.name,
RNA.seq=rowSums(
RNA.seq[,grep("MM[12].*cpc.*", names(RNA.seq))],
na.rm=TRUE
)/2
)
# Drop any mRNAs that have a zero count in the RNA-seq
tmp <- subset(tmp, RNA.seq > 0)
# Do an inner join of Abs and the microarray data based on the Systematic names
tmp <- merge(tmp, microarray, by="Systematic.name")
# Now use the relative abundances at each microarray timepoint to multiply
# the initial mRNA copies per cell. Remove any rows that contain NAs
multipliers <- as.matrix(tmp[, grep("timepoint", names(tmp))])
Abs <- data.frame(tmp$RNA.seq * multipliers)
rownames(Abs) <- tmp[,"Systematic.name"]
Abs <- na.omit(Abs) # nice function to keep only complete cases
# Remember the convention, Abs has now the samples/timepoints as columns
Abs.t <- as.data.frame(t(Abs))
# neither Abs nor Abs.t is tidy, an observation would be the abundance of an mRNA at a timepoint
Rel <- sweep(Abs,2,colSums(Abs, na.rm=TRUE),"/")
AT <- Rel
If you want logratio variance, just uncomment. A bit on the procedure and what I changed:
# ptm <- proc.time() # in case you want to time
OTUs <- rownames(AT) # or if there is an OTU column you have to remove it
AT.clr <- mutate_each(AT, funs(clrSelf))
rownames(AT.clr) <- OTUs
AT.clr_t <- as.data.frame(t(AT.clr))
AT.sma <- sma.df.Short(AT.clr_t)
# sma because it beta is the standardised major axis slope!
# # ## IF YOU ALSO WANT THE P VALUE OF THE SLOPE, YOU HAVE TO USE
# AT.sma <- sma.df(AT.clr_t)
# AT.sma$corre <- sign(AT.sma$b)*sqrt(AT.sma$r2)
#
Thetas <- 1 + (AT.sma$b)^2 - 2*abs(AT.sma$b)*AT.sma$corre
ThetasAbs <- 1 + (AT.sma$b)^2 -2*AT.sma$b*AT.sma$corre
# which is the same as
# ThetaAbs2 <- 1 + (AT.sma$b)^2 -2*abs(AT.sma$b)*abs(AT.sma$corre)
# ## IF YOU WANT LOGRATIO VARIANCES, YOU SHOULD USE THE COMPOSITIONS PACKAGE
# # unless you figure out how to vectorize (sapply sucked in time, see
# # 150618_JustThetaAndMore)
# # LOOK up the vlr function, I assume no zeros allowed!
# LogVarRatios <- variation(acomp(t(AT)))
# proc.time() - ptm # timer off
He just looks at the lower triangle of the theta matrix, though it is not symmetric. In principle it is still fine, because for the proportional ones theta is almost symmetric. You will see, I anyway considered all the data, I copied his approach with the lt (lower triangle) and added the upper triangle (simply by transposing the lt). So that way I detect pairs in which theta is below 0.05 only for the X/Y or Y/X combination.
lt <- which(col(AT.sma$b)<row(AT.sma$b),arr.ind=FALSE)
# col(matrix) just puts the numbers of the column in the matrix
# try:
# x <- matrix(1:9,3,3)
# col(x)
# which(col(x)<row(x))
# which(col(x)<row(x), arr.ind = T)
# so lt is just an integer vector referring to the lower left triangle of
# the Theta, or b matrix
lt.ind <- which(col(AT.sma$b)<row(AT.sma$b),arr.ind=TRUE)
# now he composes a large data frame
# I find growing the dataframe incrementally helps, as does getting rid
# of any objects that aren't used from here on in
AT.sma.df <- data.frame(
row=factor(rownames(AT.sma$b)[lt.ind[,"row"]]),
col=factor(colnames(AT.sma$b)[lt.ind[,"col"]])
)
AT.sma.df$b <- AT.sma$b[lt] # stores all b values of the lower triangle
#AT.sma.df$p <- AT.sma$p[lt]
#AT.sma.df$r2 <- AT.sma$r2[lt]
#AT.sma.df$vlr <- AT.vlr[lt]
AT.sma.df$cor <- AT.sma$corre[lt]
AT.sma.df$theta <- ThetasAbs[lt]
AT.sma.df$thetaUT <- t(ThetasAbs)[lt]
rm(AT.sma, lt, lt.ind)
AT.sma.lo.phi <- filter(AT.sma.df, (theta < 0.05) | (thetaUT < 0.05))
I love the memory function, see the source
lsos()
## Type Size Rows Columns
## AT.sma.df data.frame 184068360 4591965 6
## Thetas matrix 73884104 3031 3031
## ThetasAbs matrix 73884104 3031 3031
## Abs data.frame 848320 3031 16
## AT.clr_t data.frame 729024 16 3031
## Abs.t data.frame 729024 16 3031
## AT data.frame 584304 3031 16
## AT.clr data.frame 584304 3031 16
## AT.sma.lo.phi data.frame 410920 529 6
## OTUs character 194024 3031 NA
#AT.sma.df is huge, so might want to
rm(AT.sma.df)
these plots will give you an impression whether the pairs you found in the relative data are really proporional or inversely proportional
NB: the algorithm (theta equation) is based on clr data, so log data. It makes use of the fact that both proportional and inversely proprotional rel. abundance data after log transformation lies on linear lines with slope 1 or -1 respectively. I.e. slope (beta) = 1 and -1, and also cor is 1 and -1, respectively.
Short proof for the inversely proportional data (for the proportional I have done it elsewhere and it is easier.)
\[ X*Y = C ~~~~ C = \text{constant}\] \[ Y = 1/X * C \] \[ \log{Y} = \log{1/X} + \log{C} \] \[ \log{Y} = \log{C} + (-1)*\log{X}\] so when X and Y are inversely proporitonal their logarithms are on a straight line with slope -1 and intercept \(\log{C}\).
So if you would find pairs in the relative data that are not well proportional or inversely proportional something would be wrong with the algorithm
Very unlikely
## Separation into proportional and inverse proportional
AT.sma.lo.phi.pro <- filter(AT.sma.lo.phi, b >0)
AT.sma.lo.phi.inv <- filter(AT.sma.lo.phi, b <0)
# How many OTUs are in proportional associations?
# you could use the stack function on AT.sma.lo.phi, but because it ignores
# factors, you had to use as character first, so it is easier to use
# the following my.pairs
my.pairs <- cbind(as.character(AT.sma.lo.phi$row), as.character(AT.sma.lo.phi$col)) # if you want to do it with select, make SURE you change to character.
my.pairs.pro <- cbind(as.character(AT.sma.lo.phi.pro$row), as.character(AT.sma.lo.phi.pro$col))
my.pairs.inv <- cbind(as.character(AT.sma.lo.phi.inv$row), as.character(AT.sma.lo.phi.inv$col))
length(unique(c(my.pairs))) # tells you how many OTUs are in at least one proportional association,
## [1] 267
AT_t <- as.data.frame(t(AT))
# # plot all:
# if (nrow(my.pairs) > 0) {
# my.data <- stack.pairs(AT_t, my.pairs)
# # already a nice long format data.frame
# my.data$timepoint <- factor(rep(1:ncol(AT),nrow(my.pairs)))
# ggplot(data=my.data, aes(x=x, y=y, group=group)) +
# geom_path(aes(colour=group)) +
# labs(x=expression(OTU[1]), y=expression(OTU[2])) +
# #scale_colour_discrete(name=expression(paste(OTU[1], " vs.", OTU[2]))) +
# theme(legend.position="none")
#
# ggplot(data=my.data, aes(x=x, y=y, group=group)) +
# geom_path(aes(colour=group)) +
# labs(x=expression(mRNA[1]), y=expression(mRNA[2])) +
# #scale_colour_discrete(name=expression(paste(mRNA[1], " vs.", mRNA[2]))) +
# theme(legend.position="none") +
# scale_x_log10() +
# scale_y_log10() +
# coord_equal()
# }
# plot only proportionals:
if (nrow(my.pairs.pro) > 0) {
my.data.pro <- stack.pairs(AT_t, my.pairs.pro)
# already a nice long format data.frame
my.data.pro$timepoint <- factor(rep(1:ncol(AT),nrow(my.pairs.pro)))
Tr <- ggplot(data=my.data.pro, aes(x=x, y=y, group=group)) +
geom_path(aes(colour=group)) +
labs(x=expression(OTU[1]), y=expression(OTU[2])) +
#scale_colour_discrete(name=expression(paste(OTU[1], " vs.", OTU[2]))) +
theme(legend.position="none")
Tr1 <- ggplot(data=my.data.pro, aes(x=x, y=y, group=group)) +
geom_path(aes(colour=group)) +
labs(x=expression(mRNA[1]), y=expression(mRNA[2])) +
#scale_colour_discrete(name=expression(paste(mRNA[1], " vs.", mRNA[2]))) +
theme(legend.position="none") +
scale_x_log10() +
scale_y_log10() +
coord_equal()
}
Tr
Tr1
# plot only inversely proportionals:
if (nrow(my.pairs.inv) > 0) {
my.data.inv <- stack.pairs(AT_t, my.pairs.inv)
# already a nice long format data.frame
my.data.inv$timepoint <- factor(rep(1:ncol(AT),nrow(my.pairs.inv)))
Tr <- ggplot(data=my.data.inv, aes(x=x, y=y, group=group)) +
geom_path(aes(colour=group)) +
labs(x=expression(OTU[1]), y=expression(OTU[2])) +
#scale_colour_discrete(name=expression(paste(OTU[1], " vs.", OTU[2]))) +
theme(legend.position="none")
Tr1 <- ggplot(data=my.data.inv, aes(x=x, y=y, group=group)) +
geom_path(aes(colour=group)) +
labs(x=expression(mRNA[1]), y=expression(mRNA[2])) +
#scale_colour_discrete(name=expression(paste(mRNA[1], " vs.", mRNA[2]))) +
theme(legend.position="none") +
scale_x_log10() +
scale_y_log10() +
coord_equal()
}
Tr
Tr1
You found pairs of proportional and inversely proporitonal OTUs, They might be clustered (OTU1 is prop to OTU2, OTU2 also to OTU4 and so on).
The igraph package allows you to find these clusters, I did not work myself into package, so it is mostly copied. It works fine
Here I only plot the relative abundances for clusters with a minimum cluster size. So again, these will be proportional or inversely proportional otherwise the algorithm would be wrong.
#needs the igraph package
set.seed(2) # there is some randomness in the plot
g <- graph.data.frame(AT.sma.lo.phi, directed=FALSE)
# NB: the function only uses the row and col column, but you should keep at least theta, see in the following commented out plot there is some weighting with it.
# # a plot that would simply show you clusters, not saying much.
# plot(
# g,
# layout=layout.fruchterman.reingold.grid(g, weight=0.05/E(g)$theta),
# vertex.size=1,
# vertex.color="black",
# vertex.label=NA
# )
g.clust <- clusters(g) # also from the igraph package
#g.clust is a list with the entries membership, csize and no
g.df <- data.frame(
Name=V(g)$name,
cluster=g.clust$membership,
cluster.size=g.clust$csize[g.clust$membership]
)
# g.df is a data frame, it contains first the names of the OTUs that are in a proportional association
# (so nrow(g.df) should correspond with the number from above), in which cluster they are and how many OTUs are in that cluster.
# it might be worth being printed
#g.df
# save uncut g.df for autohpagy research
MinClusterSize <- 8
g.df <- filter(g.df, cluster.size > MinClusterSize-1)
ClusterToPlot <- unique(g.df$cluster)
TrList <- list()
counter = 0
for (i in ClusterToPlot) {
counter <- counter + 1
ClOTUs <- as.character(g.df$Name[g.df$cluster == i])
dfPlot <- stack(AT_t, select=ClOTUs)
colnames(dfPlot) <- c("relAbund", 'OTU')
dfPlot$Sample <- rep(1:ncol(AT),g.df$cluster.size[g.df$cluster ==i][1])
Tr <- ggplot(dfPlot, aes(x = Sample, y = relAbund, colour = OTU)) +
geom_path() +
scale_y_log10()
TrList[[counter]] = Tr
}
TrList
## [[1]]
##
## [[2]]
##
## [[3]]
Here I also plot in comparison to the absolute abundances. Should be no problem for absolute proportionals. And I plot each cluster twice, second time log(Abundance).
set.seed(2) # there is some randomness in the plot
g.pro <- graph.data.frame(AT.sma.lo.phi.pro, directed=FALSE)
g.pro.clust <- clusters(g.pro)
g.pro.df <- data.frame(
Name=V(g.pro)$name,
cluster=g.pro.clust$membership,
cluster.size=g.pro.clust$csize[g.pro.clust$membership]
)
head(g.pro.df,15)
## Name cluster cluster.size
## 1 SPBC543.02c 1 3
## 2 SPAC22H12.04c 2 116
## 3 SPBC19F8.08 2 116
## 4 SPBC21C3.15c 3 5
## 5 SPAC6B12.09 2 116
## 6 SPCPB16A4.04c 2 116
## 7 SPAC1F7.12 4 2
## 8 SPAC3H1.06c 5 2
## 9 SPAC18G6.14c 2 116
## 10 SPAC31G5.03 2 116
## 11 SPAC3A12.10 2 116
## 12 SPAC521.05 2 116
## 13 SPAC644.15 2 116
## 14 SPAC6G9.09c 2 116
## 15 SPAC9G1.03c 2 116
MinClusterSize <- 8
g.pro.df <- filter(g.pro.df, cluster.size > MinClusterSize-1)
ClusterToPlot <- unique(g.pro.df$cluster)
TrproList <- list()
counter = 0
for (i in ClusterToPlot) {
counter <- counter + 1
ClOTUs <- as.character(g.pro.df$Name[g.pro.df$cluster == i])
dfPlot <- stack(AT_t, select=ClOTUs)
dfPlot$Sample <- rep(1:ncol(AT),g.pro.df$cluster.size[g.pro.df$cluster ==i][1])
dfPlot$Type <- 'relative'
dfPlotAbs <- stack(Abs.t, select=ClOTUs)
dfPlotAbs$Sample <- rep(1:ncol(AT),g.pro.df$cluster.size[g.pro.df$cluster ==i][1])
dfPlotAbs$Type <- 'absolute'
dfPlot <- rbind(dfPlot, dfPlotAbs)
colnames(dfPlot)[1:2] <- c("Abundance", 'OTU')
Tr <- ggplot(dfPlot, aes(x = Sample, y = Abundance, colour = OTU)) +
geom_path() +
facet_grid(Type ~., scales = "free")
TrproList[[counter]] = Tr
counter = counter + 1
Tr <- Tr + scale_y_log10()
TrproList[[counter]] = Tr
}
TrproList
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
In addition I plot the clusters in association, i.e. the proportional pairs against each other as above for the entire data, but here in comparison to the absolute data
TrproList <- list()
CorProList <- list()
counter = 0
for (i in ClusterToPlot) {
counter = counter + 1
ClOTUs <- as.character(g.pro.df$Name[g.pro.df$cluster == i])
SelectPairs <- matrix(my.pairs.pro %in% ClOTUs, nrow(my.pairs.pro),
ncol(my.pairs.pro))
# Seems one would have been enough, row or columns
SelectPairs <- my.pairs.pro[SelectPairs[,1] == TRUE & SelectPairs[,2] == TRUE,]
my.data.pro <- stack.pairs(AT_t, SelectPairs)
# already a nice long format data.frame
my.data.pro$timepoint <- factor(rep(1:ncol(AT),nrow(SelectPairs)))
my.data.pro$Type <- 'relative'
my.data.pro.abs <- stack.pairs(Abs.t, SelectPairs)
my.data.pro.abs$timepoint <- factor(rep(1:ncol(AT),nrow(SelectPairs)))
my.data.pro.abs$Type <- 'absolute'
my.data.pro <- rbind(my.data.pro, my.data.pro.abs)
Tr <- ggplot(data=my.data.pro, aes(x=x, y=y, colour=group)) +
geom_path() +
labs(x=expression(OTU[1]), y=expression(OTU[2])) +
facet_wrap(~ Type, scales = "free") +
theme(legend.position="none")
TrproList[[counter]] <- Tr
CorRel <- numeric(nrow(SelectPairs))
CorAbs <- numeric(nrow(SelectPairs))
for (e in 1:nrow(SelectPairs)) {
CorRel[e] <- cor(AT_t[,colnames(AT_t)==SelectPairs[e,1]],
AT_t[,colnames(AT_t)==SelectPairs[e,2]])
CorAbs[e] <- cor(Abs.t[,colnames(AT_t)==SelectPairs[e,1]],
Abs.t[,colnames(AT_t)==SelectPairs[e,2]])
}
CorProList[[counter]] <- data.frame(row = SelectPairs[,1],
col = SelectPairs[,2],
Rel = CorRel,
Abs = CorAbs)
counter <- counter + 1
Tr <- Tr +
scale_x_log10() +
scale_y_log10() +
coord_equal()
TrproList[[counter]] <- Tr
CorRel <- numeric(nrow(SelectPairs))
CorAbs <- numeric(nrow(SelectPairs))
for (e in 1:nrow(SelectPairs)) {
CorRel[e] <- cor(log(AT_t[,colnames(AT_t)==SelectPairs[e,1]]),
log(AT_t[,colnames(AT_t)==SelectPairs[e,2]]))
CorAbs[e] <- cor(log(Abs.t[,colnames(AT_t)==SelectPairs[e,1]]),
log(Abs.t[,colnames(AT_t)==SelectPairs[e,2]]))
}
CorProList[[counter]] <- data.frame(row = SelectPairs[,1],
col = SelectPairs[,2],
Rel = CorRel,
Abs = CorAbs)
}
for (i in 1:length(TrproList)) {
print(TrproList[[i]])
print(head(CorProList[[i]],10))
}
## row col Rel Abs
## 1 SPAC22H12.04c SPAC1071.07c 0.9704640 0.9928387
## 2 SPBC19F8.08 SPAC1071.07c 0.9852984 0.9983288
## 3 SPAC6B12.09 SPAC10F6.10 0.9692772 0.9953621
## 4 SPCPB16A4.04c SPAC10F6.10 0.9696391 0.9974263
## 5 SPAC18G6.14c SPAC13G6.02c 0.9682165 0.9933528
## 6 SPAC31G5.03 SPAC13G6.02c 0.9578292 0.9947453
## 7 SPAC3A12.10 SPAC13G6.02c 0.9894862 0.9982237
## 8 SPAC521.05 SPAC13G6.02c 0.9825753 0.9990985
## 9 SPAC644.15 SPAC13G6.02c 0.9923702 0.9991513
## 10 SPAC6G9.09c SPAC13G6.02c 0.9306675 0.9955962
## row col Rel Abs
## 1 SPAC22H12.04c SPAC1071.07c 0.9861254 0.9945893
## 2 SPBC19F8.08 SPAC1071.07c 0.9793847 0.9938050
## 3 SPAC6B12.09 SPAC10F6.10 0.9806938 0.9846665
## 4 SPCPB16A4.04c SPAC10F6.10 0.9767767 0.9799772
## 5 SPAC18G6.14c SPAC13G6.02c 0.9806453 0.9903803
## 6 SPAC31G5.03 SPAC13G6.02c 0.9797884 0.9887802
## 7 SPAC3A12.10 SPAC13G6.02c 0.9783663 0.9890688
## 8 SPAC521.05 SPAC13G6.02c 0.9846948 0.9895258
## 9 SPAC644.15 SPAC13G6.02c 0.9888088 0.9855606
## 10 SPAC6G9.09c SPAC13G6.02c 0.9743296 0.9930005
## row col Rel Abs
## 1 SPAC2E1P3.03c SPAC19D5.09c 0.9983848 0.9997834
## 2 SPAC9.04 SPAC19D5.09c 0.9987617 0.9998971
## 3 SPBC1289.17 SPAC19D5.09c 0.9993006 0.9998295
## 4 SPBC1E8.04 SPAC19D5.09c 0.9994121 0.9997411
## 5 SPCC1494.11c SPAC19D5.09c 0.9991882 0.9999118
## 6 SPAC9.04 SPAC2E1P3.03c 0.9991236 0.9997372
## 7 SPBC1289.17 SPAC2E1P3.03c 0.9984798 0.9999075
## 8 SPBC1E8.04 SPAC2E1P3.03c 0.9983695 0.9998668
## 9 SPCC1494.11c SPAC2E1P3.03c 0.9990796 0.9998995
## 10 SPNCRNA.1056 SPAC2E1P3.03c 0.9981839 0.9994390
## row col Rel Abs
## 1 SPAC2E1P3.03c SPAC19D5.09c 0.9910678 0.9963638
## 2 SPAC9.04 SPAC19D5.09c 0.9934031 0.9970653
## 3 SPBC1289.17 SPAC19D5.09c 0.9953342 0.9982706
## 4 SPBC1E8.04 SPAC19D5.09c 0.9961045 0.9982650
## 5 SPCC1494.11c SPAC19D5.09c 0.9939850 0.9975420
## 6 SPAC9.04 SPAC2E1P3.03c 0.9955767 0.9979794
## 7 SPBC1289.17 SPAC2E1P3.03c 0.9918664 0.9966562
## 8 SPBC1E8.04 SPAC2E1P3.03c 0.9921696 0.9966259
## 9 SPCC1494.11c SPAC2E1P3.03c 0.9953442 0.9980359
## 10 SPNCRNA.1056 SPAC2E1P3.03c 0.9843470 0.9949199
Here it is getting exciting, does it make sense to look for invesely propotionals in relative abundacne data, is there a correlation in the absolute abundance data at all???
I fear not
set.seed(2) # there is some randomness in the plot
g.inv <- graph.data.frame(AT.sma.lo.phi.inv, directed=FALSE)
# # a plot that would simply show you clusters, not saying much.
# plot(
# g.inv,
# layout=layout.fruchterman.reingold.grid(g.inv, weight=0.05/E(g.inv)$thetaAbs),
# vertex.size=1,
# vertex.color="black",
# vertex.label=NA
# )
g.inv.clust <- clusters(g.inv) # also from the igraph package
#g.inv.clust is a list with the entries membership, csize and no
g.inv.df <- data.frame(
Name=V(g.inv)$name,
cluster=g.inv.clust$membership,
cluster.size=g.inv.clust$csize[g.inv.clust$membership]
)
# g.df is a data frame, it contains first the names of the OTUs that are in a proportional association (should correspond with the number from above), in which cluster they are and how many OTUs are in that cluster.
# it is worth being printed
head(g.inv.df,15)
## Name cluster cluster.size
## 1 SPAC27E2.09 1 2
## 2 SPAC4F10.20 2 2
## 3 SPAC6G9.08 3 2
## 4 SPCC576.15c 4 2
## 5 SPNCRNA.727 5 3
## 6 SPSNORNA.10 5 3
## 7 SPNCRNA.491 6 4
## 8 SPSNORNA.54 6 4
## 9 SPBC543.02c 7 3
## 10 SPCC306.09c 8 2
## 11 SPBC28F2.04c 9 2
## 12 SPBC24C6.02 10 2
## 13 SPBP16F5.03c 11 2
## 14 SPNCRNA.445 12 2
## 15 SPBC2A9.05c 13 2
MinClusterSize <- 4
g.inv.df <- filter(g.inv.df, cluster.size > MinClusterSize-1)
ClusterToPlot <- unique(g.inv.df$cluster)
TrInvList <- list()
counter = 0
for (i in ClusterToPlot) {
counter <- counter + 1
ClOTUs <- as.character(g.inv.df$Name[g.inv.df$cluster == i])
dfPlot <- stack(AT_t, select=ClOTUs)
dfPlot$Sample <- rep(1:ncol(AT),g.inv.df$cluster.size[g.inv.df$cluster ==i][1])
dfPlot$Type <- 'relative'
dfPlotAbs <- stack(Abs.t, select=ClOTUs)
dfPlotAbs$Sample <- rep(1:ncol(AT),g.inv.df$cluster.size[g.inv.df$cluster ==i][1])
dfPlotAbs$Type <- 'absolute'
dfPlot <- rbind(dfPlot, dfPlotAbs)
colnames(dfPlot)[1:2] <- c("Abundance", 'OTU')
Tr <- ggplot(dfPlot, aes(x = Sample, y = Abundance, colour = OTU)) +
geom_path() +
facet_grid(Type ~., scales = "free")
TrInvList[[counter]] = Tr
counter = counter + 1
Tr <- Tr + scale_y_log10()
TrInvList[[counter]] = Tr
}
TrInvList
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
TrInvList <- list()
CorInvList <- list()
counter = 0
for (i in ClusterToPlot) {
counter = counter + 1
ClOTUs <- as.character(g.inv.df$Name[g.inv.df$cluster == i])
SelectPairs <- matrix(my.pairs.inv %in% ClOTUs, nrow(my.pairs.inv),
ncol(my.pairs.inv))
# Seems one would have been enough, row or columns
SelectPairs <- my.pairs.inv[SelectPairs[,1] == TRUE & SelectPairs[,2] == TRUE,]
my.data.Inv <- stack.pairs(AT_t, SelectPairs)
# already a nice long format data.frame
my.data.Inv$timepoint <- factor(rep(1:ncol(AT),nrow(SelectPairs)))
my.data.Inv$Type <- 'relative'
my.data.Inv.abs <- stack.pairs(Abs.t, SelectPairs)
my.data.Inv.abs$timepoint <- factor(rep(1:ncol(AT),nrow(SelectPairs)))
my.data.Inv.abs$Type <- 'absolute'
my.data.Inv <- rbind(my.data.Inv, my.data.Inv.abs)
Tr <- ggplot(data=my.data.Inv, aes(x=x, y=y, colour=group)) +
geom_path() +
labs(x=expression(OTU[1]), y=expression(OTU[2])) +
facet_wrap(~ Type, scales = "free") +
theme(legend.position="none")
TrInvList[[counter]] <- Tr
CorRel <- numeric(nrow(SelectPairs))
CorAbs <- numeric(nrow(SelectPairs))
for (e in 1:nrow(SelectPairs)) {
CorRel[e] <- cor(AT_t[,colnames(AT_t)==SelectPairs[e,1]],
AT_t[,colnames(AT_t)==SelectPairs[e,2]])
CorAbs[e] <- cor(Abs.t[,colnames(AT_t)==SelectPairs[e,1]],
Abs.t[,colnames(AT_t)==SelectPairs[e,2]])
}
CorInvList[[counter]] <- data.frame(row = SelectPairs[,1],
col = SelectPairs[,2],
Rel = CorRel,
Abs = CorAbs)
counter <- counter + 1
Tr <- Tr +
scale_x_log10() +
scale_y_log10() +
coord_equal()
TrInvList[[counter]] <- Tr
CorRel <- numeric(nrow(SelectPairs))
CorAbs <- numeric(nrow(SelectPairs))
for (e in 1:nrow(SelectPairs)) {
CorRel[e] <- cor(log(AT_t[,colnames(AT_t)==SelectPairs[e,1]]),
log(AT_t[,colnames(AT_t)==SelectPairs[e,2]]))
CorAbs[e] <- cor(log(Abs.t[,colnames(AT_t)==SelectPairs[e,1]]),
log(Abs.t[,colnames(AT_t)==SelectPairs[e,2]]))
}
CorInvList[[counter]] <- data.frame(row = SelectPairs[,1],
col = SelectPairs[,2],
Rel = CorRel,
Abs = CorAbs)
}
for (i in 1:length(TrInvList)) {
print(TrInvList[[i]])
print(head(CorInvList[[i]],10))
}
## row col Rel Abs
## 1 SPNCRNA.491 SPAC19G12.15c -0.8029015 0.7547520
## 2 SPSNORNA.54 SPAC19G12.15c -0.8707235 0.7299556
## 3 SPSNORNA.54 SPAC637.06 -0.8279560 0.7546649
## row col Rel Abs
## 1 SPNCRNA.491 SPAC19G12.15c -0.9356963 0.6886016
## 2 SPSNORNA.54 SPAC19G12.15c -0.9628782 0.7417726
## 3 SPSNORNA.54 SPAC637.06 -0.9448771 0.7599803
## row col Rel Abs
## 1 SPCC4G3.17 SPBC13E7.10c -0.9012894 0.9095629
## 2 SPBP8B7.11 SPBC337.12 -0.8372574 0.6966663
## 3 SPCC4G3.17 SPBC337.12 -0.8603512 0.7795014
## 4 SPNCRNA.1436 SPCC4G3.17 -0.8680173 0.7579243
## row col Rel Abs
## 1 SPCC4G3.17 SPBC13E7.10c -0.9434212 0.9743674
## 2 SPBP8B7.11 SPBC337.12 -0.9487379 0.8918364
## 3 SPCC4G3.17 SPBC337.12 -0.9267393 0.9158434
## 4 SPNCRNA.1436 SPCC4G3.17 -0.9132845 0.9026285
# # Return the row minimum of the lower triangle of X
# lt.row.min <- function(X){
# result <- numeric(nrow(X) - 1) # just defines a vector with zeros of
# # length: nrow(X) -1
# for(i in 2:nrow(X)){
# result[i-1] <- min(X[i, 1:i-1])
# }
# result
# }
#
#
# # Find the row minimum of the lower triangle of phi
# AT.phi.min <- lt.row.min(Thetas)
# # AT.phi.min is a vector, storing the minimal theta in each row (for each
# # species/mRNA) of the lower left triangle (draw it)
#
# AT.phi.min.sort <- sort(AT.phi.min)
#
# cum <- seq(from=0,to=1,along.with=AT.phi.min.sort)
# ggplot(data=data.frame(AT.phi.min=AT.phi.min.sort,cum=cum), aes(x=AT.phi.min, y=cum)) +
# geom_line() +
# labs(x=expression(paste("min( ", phi, "(clr(", x[i], "), clr(", x[j], ") ) ")), y="cumulative distribution") +
# coord_cartesian(xlim = c(0,0.05))
# rm(Rel.phi.min.sort, cum)