Defining the dilution scheme :
(dilution <- data.frame(from=c(10,10,10,40,40,40,40,50),
to=c(90,90,90,160,160,160,160,50),
rep=c(1,0,1,2,4,4,4,2)))## from to rep
## 1 10 90 1
## 2 10 90 0
## 3 10 90 1
## 4 40 160 2
## 5 40 160 4
## 6 40 160 4
## 7 40 160 4
## 8 50 50 2
volume.fly <- 250
volume.droplet <- 5Sample are simulated with the folowing sequence of events: * for each dilution, the number of cells transferred from the source tube is randomly drawn from a Poisson distribution * the concentration in the destination tube is computed as from/(from+to) * CFU counts are randomly drawn from a Poisson distribution with average the concentration just computed multiplied by the volume of the droplet.
Proceeding this way, replicate platings of a given dilution are truly independent, but successive dilutions are not. This mimics the method we actually use in our experiments.
simulation.sample <- function(lambda,dilution,volume.fly,volume.droplet) {
nb <- nrow(dilution)
cfu <- rep(NA,sum(dilution$rep))
## number of CFU extracted from the fly (drawn from a Poisson)
n <- c(rpois(1,lambda),rep(NA,nb))
## concentration in the extraction tube
conc <- c(n[1]/volume.fly,rep(NA,nb))
nbcfu <- 0
for(i in 1:nb) {
## the number of cells transferred from source is drawn from a Poisson distribution
n[i+1] <- rpois(1,conc[i]*dilution$from[i])
## concentration in the destination tube
conc[i+1] <- n[i+1]/(dilution$from[i]+dilution$to[i])
if(dilution$rep[i]>0) {
## number of CFU obtained from the destination tube
cfu[nbcfu+1:dilution$rep[i]] <- rpois(dilution$rep[i],conc[i+1]*volume.droplet)
nbcfu <- nbcfu + dilution$rep[i]
}
}
## computes dilution factors
df <- cumprod(with(dilution,(from+to)/from))
return(data.frame(CFU=cfu,Dilution=rep(df,dilution$rep)))
}
simulation.dataset <- function(lambda,dilution,volume.fly,volume.droplet,sampleID=NULL) {
sim <- NULL
for(i in 1:length(lambda)) {
s <- simulation.sample(lambda[i],dilution,volume.fly,volume.droplet)
if(is.null(sampleID)) {
s$sampleID <- i
} else {
s$sampleID <- sampleID[i]
}
sim <- rbind(sim,s)
}
return(sim)
}Randomly draws lines : for each fly, a load is randomly drawn from a Gamma distribution with scale fixed to 1e4 and average varying from 1e4 to 1e9. These different average loads are assumed to correspond to different lines. Each fly is identified by a unique value of sampleID.
nbfly <- 50
scale <- 10^4
v.shape <- 10^(4:9)/scale
dlines <- data.frame(lambda=rgamma(nbfly*length(v.shape),shape=v.shape,scale=scale),
Line=paste0("L",rep(1:length(v.shape),nbfly)),
sampleID=paste0("L",rep(1:length(v.shape),nbfly),"_",rep(1:nbfly,each=length(v.shape))))
rownames(dlines) <- dlines$sampleID
plot(lambda~Line,data=dlines,log="y")Randomly draw CFU counts for each fly.
d <- simulation.dataset(dlines$lambda,sampleID=dlines$sampleID,dilution,volume.fly,volume.droplet)
subset(d,sampleID=="L1_1")## CFU Dilution sampleID
## 1 4 10 L1_1
## 2 0 1000 L1_1
## 3 0 5000 L1_1
## 4 0 5000 L1_1
## 5 0 25000 L1_1
## 6 0 25000 L1_1
## 7 0 25000 L1_1
## 8 0 25000 L1_1
## 9 0 125000 L1_1
## 10 0 125000 L1_1
## 11 0 125000 L1_1
## 12 0 125000 L1_1
## 13 0 625000 L1_1
## 14 0 625000 L1_1
## 15 0 625000 L1_1
## 16 0 625000 L1_1
## 17 0 1250000 L1_1
## 18 0 1250000 L1_1
subset(d,sampleID=="L2_1")## CFU Dilution sampleID
## 19 186 10 L2_1
## 20 2 1000 L2_1
## 21 1 5000 L2_1
## 22 0 5000 L2_1
## 23 0 25000 L2_1
## 24 0 25000 L2_1
## 25 0 25000 L2_1
## 26 0 25000 L2_1
## 27 0 125000 L2_1
## 28 0 125000 L2_1
## 29 0 125000 L2_1
## 30 1 125000 L2_1
## 31 0 625000 L2_1
## 32 0 625000 L2_1
## 33 0 625000 L2_1
## 34 0 625000 L2_1
## 35 0 1250000 L2_1
## 36 0 1250000 L2_1
subset(d,sampleID=="L3_1")## CFU Dilution sampleID
## 37 2181 10 L3_1
## 38 24 1000 L3_1
## 39 3 5000 L3_1
## 40 4 5000 L3_1
## 41 1 25000 L3_1
## 42 0 25000 L3_1
## 43 1 25000 L3_1
## 44 0 25000 L3_1
## 45 0 125000 L3_1
## 46 0 125000 L3_1
## 47 0 125000 L3_1
## 48 0 125000 L3_1
## 49 0 625000 L3_1
## 50 0 625000 L3_1
## 51 0 625000 L3_1
## 52 0 625000 L3_1
## 53 0 1250000 L3_1
## 54 0 1250000 L3_1
subset(d,sampleID=="L4_1")## CFU Dilution sampleID
## 55 19599 10 L4_1
## 56 218 1000 L4_1
## 57 36 5000 L4_1
## 58 34 5000 L4_1
## 59 8 25000 L4_1
## 60 9 25000 L4_1
## 61 5 25000 L4_1
## 62 10 25000 L4_1
## 63 3 125000 L4_1
## 64 2 125000 L4_1
## 65 1 125000 L4_1
## 66 0 125000 L4_1
## 67 0 625000 L4_1
## 68 0 625000 L4_1
## 69 0 625000 L4_1
## 70 0 625000 L4_1
## 71 0 1250000 L4_1
## 72 0 1250000 L4_1
subset(d,sampleID=="L5_1")## CFU Dilution sampleID
## 73 202854 10 L5_1
## 74 2043 1000 L5_1
## 75 389 5000 L5_1
## 76 392 5000 L5_1
## 77 74 25000 L5_1
## 78 90 25000 L5_1
## 79 82 25000 L5_1
## 80 82 25000 L5_1
## 81 16 125000 L5_1
## 82 21 125000 L5_1
## 83 20 125000 L5_1
## 84 16 125000 L5_1
## 85 4 625000 L5_1
## 86 3 625000 L5_1
## 87 5 625000 L5_1
## 88 1 625000 L5_1
## 89 4 1250000 L5_1
## 90 2 1250000 L5_1
subset(d,sampleID=="L6_1")## CFU Dilution sampleID
## 91 1992771 10 L6_1
## 92 19777 1000 L6_1
## 93 3920 5000 L6_1
## 94 4035 5000 L6_1
## 95 726 25000 L6_1
## 96 833 25000 L6_1
## 97 797 25000 L6_1
## 98 799 25000 L6_1
## 99 152 125000 L6_1
## 100 161 125000 L6_1
## 101 158 125000 L6_1
## 102 168 125000 L6_1
## 103 35 625000 L6_1
## 104 25 625000 L6_1
## 105 37 625000 L6_1
## 106 34 625000 L6_1
## 107 12 1250000 L6_1
## 108 17 1250000 L6_1
Check that a Poisson glm using complete data produces unbiased estimates of load. This is not warantied because the autocorrelation between successive dilutions are not accounted for in this analysis.
m <- glm(CFU~sampleID,offset=log(volume.droplet)-log(Dilution),data=d,family=poisson(link="log"))
v <- coef(m)/log(10)
v[-1] <- v[-1]+v[1]+log10(volume.fly)
names(v) <- gsub("sampleID","",names(v))
names(v)[1] <- "L1_1"
plot(log10(dlines[names(v),"lambda"]),v,xlab="log lambda",ylab ="estimated log lambda (complete data)")
abline(0,1)
abline(h=4:9,col="gray")
abline(v=4:9,col="gray")Now consider that numbers of CFU above 100 are impossible to count.
d$CFUtr <- d$CFU
d$CFUtr[d$CFU>100] <- InfWe use loadEstimate to estimate load. The three available methods will be tried, with maxCFU=50 or maxCFU=100 in the case of Truncated Poisson and Binomial-Poisson mixture.
library(CFUfit)
est1 <- loadEstimate(cfu = d$CFUtr,dilution = 1/(volume.fly*d$Dilution),volume = volume.droplet,
sampleID = d$sampleID,maxCFU=100,ignoreAboveMaxCFU=FALSE,logbase = 10)
colnames(est1)[-1] <- paste0(colnames(est1)[-1],"_BP_100")
est2 <- loadEstimate(cfu = d$CFUtr,dilution = 1/(volume.fly*d$Dilution),volume = volume.droplet,
sampleID=d$sampleID,maxCFU=50,ignoreAboveMaxCFU=FALSE,logbase = 10)
colnames(est2)[-1] <- paste0(colnames(est2)[-1],"_BP_50")
est3 <- loadEstimate(cfu = d$CFUtr,dilution = 1/(volume.fly*d$Dilution),volume = volume.droplet,
sampleID=d$sampleID,maxCFU=100,ignoreAboveMaxCFU=TRUE,logbase = 10)
colnames(est3)[-1] <- paste0(colnames(est3)[-1],"_TP_100")
est4 <- loadEstimate(cfu = d$CFUtr,dilution = 1/(volume.fly*d$Dilution),volume = volume.droplet,
sampleID=d$sampleID,maxCFU=50,ignoreAboveMaxCFU=TRUE,logbase = 10)
colnames(est4)[-1] <- paste0(colnames(est4)[-1],"_TP_50")
l <- is.finite(d$CFUtr)
est5 <- loadEstimate(cfu = d$CFUtr[l],dilution = 1/(volume.fly*d$Dilution[l]),volume = volume.droplet,
sampleID=d$sampleID[l],maxCFU=NA,logbase = 10)
colnames(est5)[-1] <- paste0(colnames(est5)[-1],"_P_100")
# merge with dlines
est <- cbind(dlines, est1[match(dlines$sampleID,est1$sampleID),]
,est2[match(dlines$sampleID,est2$sampleID),]
,est3[match(dlines$sampleID,est3$sampleID),]
,est4[match(dlines$sampleID,est4$sampleID),]
,est5[match(dlines$sampleID,est5$sampleID),])Plotting estimates against actual values.
plot(logload_BP_100~log10(lambda),data=est,
main="Binomial-Poisson mixture",
xlab="log lambda",ylab="estimated log lambda (truncated data)")
abline(0,1)
abline(lm(logload_BP_100~log10(lambda),data=est),col="indianred4")
abline(h=4:9,col="gray")
abline(v=4:9,col="gray")plot(logload_TP_100~log10(lambda),data=est,
main="Truncated Poisson",
xlab="log lambda",ylab="estimated log lambda (truncated data)")
abline(0,1)
abline(lm(logload_TP_100~log10(lambda),data=est),col="indianred4")
abline(h=4:9,col="gray")
abline(v=4:9,col="gray")plot(logload_P_100~log10(lambda),data=est,
main="Poisson",
xlab="log lambda",ylab="estimated log lambda (truncated data)")
abline(0,1)
abline(lm(logload_P_100~log10(lambda),data=est),col="indianred4")
abline(h=4:9,col="gray")
abline(v=4:9,col="gray")Binomial-Poisson is a bit better than Truncated Poisson and plain Poisson in terms of precision (the two latter yield low estimates for intermediate lambda). Plain Poisson sometimes fails to estimate load for the lowest \(\lambda\).
library(beanplot)
beanplot(logload_BP_100~Line,data=est,what = c(F,T,T,F),col = as.list(adjustcolor(terrain.colors(5),alpha.f = 0.75)),
log="",ylab="log10 bacteria per fly")
points(logload_BP_100~jitter(as.numeric(Line)),data=est,pch=3,
col=adjustcolor("black",alpha.f=0.5),cex=0.5)beanplot(logload_BP_50~Line,data=est,what = c(F,T,T,F),col = as.list(adjustcolor(terrain.colors(5),alpha.f = 0.75)),
log="",ylab="log10 bacteria per fly")
points(logload_BP_50~jitter(as.numeric(Line)),data=est,pch=3,
col=adjustcolor("black",alpha.f=0.5),cex=0.5)beanplot(logload_TP_100~Line,data=est,what = c(F,T,T,F),col = as.list(adjustcolor(terrain.colors(5),alpha.f = 0.75)),
log="",ylab="log10 bacteria per fly")
points(logload_TP_100~jitter(as.numeric(Line)),data=est,pch=3,
col=adjustcolor("black",alpha.f=0.5),cex=0.5)beanplot(logload_TP_50~Line,data=est,what = c(F,T,T,F),col = as.list(adjustcolor(terrain.colors(5),alpha.f = 0.75)),
log="",ylab="log10 bacteria per fly")
points(logload_TP_50~jitter(as.numeric(Line)),data=est,pch=3,
col=adjustcolor("black",alpha.f=0.5),cex=0.5)beanplot(logload_P_100~Line,data=est,what = c(F,T,T,F),col = as.list(adjustcolor(terrain.colors(5),alpha.f = 0.75)),
log="",ylab="log10 bacteria per fly")
points(logload_P_100~jitter(as.numeric(Line)),data=est,pch=3,
col=adjustcolor("black",alpha.f=0.5),cex=0.5)Increasing maxCFU increases precision. Load estimates have a multimodal distribution for the two lowest values of \(\lambda\). Changing method does not change much from that prospective…
d.mv <- data.frame(Line=unique(est$Line))
for(i in grep("^logload_",colnames(est))) {
dtmp <- data.frame(tapply(est[,i],est$Line,mean,na.rm=TRUE),
tapply(est[,i],est$Line,var,na.rm=TRUE))
colnames(dtmp) <- paste(c("mean","var"),colnames(est)[i],sep="_")
d.mv <- cbind(d.mv,dtmp)
}
d.mv$mean <- with(dlines,tapply(log10(lambda),Line,mean,na.rm=TRUE))
d.mv$var <- with(dlines,tapply(log10(lambda),Line,var,na.rm=TRUE))
plot(var~mean,data=d.mv,type="l",col="red",xlab="mean",ylab="variance")
points(var_logload_BP_100~mean_logload_BP_100,data=d.mv,type="b",pch=1)
points(var_logload_BP_50~mean_logload_BP_100,data=d.mv,type="b",pch=2)
points(var_logload_TP_100~mean_logload_BP_100,data=d.mv,type="b",pch=3)
points(var_logload_TP_50~mean_logload_BP_100,data=d.mv,type="b",pch=4)
points(var_logload_P_100~mean_logload_BP_100,data=d.mv,type="b",pch=5)The relationship between variance and mean of log estimation (which comes from the Gamma distribution of \(\lambda\)) is best preserved by the Binomial-Poisson mixture. Note that this is not true if load (instead of log load) is analyzed.