Click here for other works of the author on RPubs
Import data set enviANDdensity
and extract density data of fish and copepod from the data set
library(knitr)
enviANDdensity <- read.table("enviANDdensity.txt", header = T)
fish_density=as.matrix(enviANDdensity[,11])
copepod_density=as.matrix(enviANDdensity[,12])
kable(head(cbind(fish_density,copepod_density)),digits=2,col.names = c("Fish density","Copepod density"),align="l",caption="Fish and copepod density data")
Fish density | Copepod density |
---|---|
137.32 | 1119 |
20.97 | 1153 |
0.00 | 1719 |
0.00 | 855 |
180.71 | 1246 |
88.35 | 2123 |
Create function boot.samp
to generate bootstrap samples for a given sample and a statistic of interest
boot.samp <- function(samp,n,stat){
FUN=match.fun(stat)
#create and randomize n sets of bootstrap samples
size=length(samp)
shuffle=rep(samp,n)[order(runif(size*n))]
#split bootstrap samples and calculate the statistic of interest for each set
bt=apply(matrix(shuffle,size,n),2,FUN)
#return bootstrap samples
return(bt)
}
Create function boot.result
to report the estimated statistic and its standard error.
boot.result <- function(bt.samp,origin.mean=NULL){
#calculate the estimated parameter and SE for bootstrap
result=t(as.matrix(cbind(mean(bt.samp),sd(bt.samp))))
# If mean is the statistic of interest, calculate SE using normal theory
if(!is.null(origin.mean)){
origin=t(as.matrix(cbind(mean(origin.mean),sd(origin.mean)/sqrt(length(origin.mean)))))
result=cbind(origin,result)
colnames(result) <- c("Normal theory","Bootstrap")
rownames(result) <- c("Estimated parameter","Estimated standard error")
}
else{
colnames(result) <- c("Bootstrap")
rownames(result) <- c("Estimated parameter","Estimated standard error")
}
return(result)
}
Create function boot.plot
to plot the bootstrap samples with a histogram
boot.plot <- function(bt.samp,p.title="Bootstrap histogram",hist.break=50){
#plot the histogram of bootstrap samples along with a smoothed density curve and 95% confidence interval
hist(bt.samp,breaks=hist.break,col="gray",main=p.title,prob=T,xlab=NULL)
lines(density(bt.samp),col="blue",lwd=2)
abline(v=sort(bt.samp)[length(bt.samp)*c(0.025, 0.975)],col="red",lwd=2)
legend("topright",lty=c(1,1),legend=c("Smoothed density","95% CI"),col=c("blue","red"),lwd=2)
}
Bootstrap samples are generated using the function created above. Sample size is set to 1000.
fish_mean_bt=boot.samp(fish_density,1000,mean)
copepod_mean_bt=boot.samp(copepod_density,1000,mean)
boot.fish.mean=boot.result(fish_mean_bt,fish_density)
kable(boot.fish.mean,digits=2,caption="Comparison of normal theory v.s bootstrap for fish density mean",align="l")
Normal theory | Bootstrap | |
---|---|---|
Estimated parameter | 322.45 | 322.45 |
Estimated standard error | 61.23 | 60.20 |
boot.plot(fish_mean_bt,"Bootstrap samples of fish density mean")
boot.copepod.mean=boot.result(copepod_mean_bt,copepod_density)
kable(boot.copepod.mean,digits=2,caption="Comparison of normal theory v.s bootstrap for copepod density mean",align="l")
Normal theory | Bootstrap | |
---|---|---|
Estimated parameter | 1972.37 | 1972.37 |
Estimated standard error | 342.55 | 348.35 |
boot.plot(copepod_mean_bt,"Bootstrap samples of copepod density mean")
Bootstrap sampls are generated using the function created above. Sample size is set to 1000.
fish_median_bt=boot.samp(fish_density,1000,median)
copepod_median_bt=boot.samp(copepod_density,1000,median)
boot.fish.median=boot.result(fish_median_bt)
kable(boot.fish.median,digits=2,caption="Bootstrap result for fish density median",align="l")
Bootstrap | |
---|---|
Estimated parameter | 175.01 |
Estimated standard error | 97.27 |
boot.plot(fish_median_bt,"Bootstrap samples of fish density median")
boot.copepod.median=boot.result(copepod_median_bt)
kable(boot.copepod.median,digits=2,caption="Bootstrap result for copepod density median",align="l")
Bootstrap | |
---|---|
Estimated parameter | 1383.70 |
Estimated standard error | 197.27 |
boot.plot(copepod_median_bt,"Bootstrap samples of copepod density median")
# set dependent variable
Y=fish_density
# set intercept and independent variable
X=cbind(rep(1,length(copepod_density)),copepod_density)
# compute coefficients
B = solve(t(X) %*% X) %*% (t(X) %*% Y)
# plot fish v.s copepod
plot(copepod_density,fish_density,xlab="Copepod density",ylab="Fish density",pch=16,col="blue")
# add the regression line
x=seq(min(copepod_density),max(copepod_density),length=100)
lines(x,B[1]+B[2]*x,col="red",lwd=2)
legend("bottomright",pch=c(16,NA),lty=c(0,1),legend=c("Actual","Predicted"),col=c("blue","red"),lwd=2)
Create function lm.boot
to generate bootstrap samples for coefficients in a linear regression model for given dependent and independent variables
lm.boot <- function(dv,iv,n){
B.sample=numeric()
for(i in 1:n){
#generate bootstrap samplea for coefficients in a linear regression model
size=length(dv)
samp=as.matrix(cbind(dv,iv))[ceiling(runif(size,0,size)),]
Y=samp[,1]
X=cbind(rep(1,length(samp[,1])),samp[,-1])
beta = solve(t(X) %*% X) %*% (t(X) %*% Y)
B.sample=append(B.sample,beta)
}
#return bootstrap samples for the coefficients
return(t(matrix(B.sample,dim(samp)[2],n)))
}
Conduct bootstrap on coefficients \(B_0\), \(B_1\) and report results using the lm.boot
and boot.result
functions created above
lm.bt=lm.boot(fish_density,copepod_density,1000)
result.lm.B0=boot.result(lm.bt[,1])
kable(result.lm.B0,digits=2,caption="Bootstrap result of coefficient B0",align="l")
Bootstrap | |
---|---|
Estimated parameter | 84.99 |
Estimated standard error | 61.79 |
boot.plot(lm.bt[,1],"Histogram for bootstrap samples of coefficient B0")
result.lm.B1=boot.result(lm.bt[,2])
kable(result.lm.B1,digits=2,caption="Bootstrap result of coefficient B1",align="l")
Bootstrap | |
---|---|
Estimated parameter | 0.12 |
Estimated standard error | 0.03 |
boot.plot(lm.bt[,2],"Histogram for bootstrap samples of coefficient B1")