load mosaic and Lock5Data libraries
library(mosaic)
library(Lock5Data)
library(gridExtra)
set seed of random number generator so we can get the same results each time we run the code
set.seed(1500)
Read in data
ddat<-read.csv("../data/Deercaptures.csv")
Lets create a smaller data set with 1/4 the number of observations
ddatsmall<-sample(ddat, 318/4)
Lets consider the sampling distribution of the proportion of captured deer that were fawns. Our sample proportions are similar in the full and reduced data sets.
prop(~ageclass, data=ddat, success="Fawn")
## prop_Fawn
## 0.1886792
prop(~ageclass, data=ddatsmall, success="Fawn")
## prop_Fawn
## 0.1898734
Lets create 5 different bootstrap distributions and confidence intervals using:
I am going to use a “for loop” rather than mosaic’s “do” function since the simulation is a bit more complicated than the things we normally do in biometry.
# Set up objects to store results
confintSmall<-matrix(NA, 5,2) # confidence intervals for small sample size
confintLarge<-matrix(NA, 5,2) # confidence intervals for full data set
par(mfrow=c(5,2)) # sets up a plotting window
for(i in 1:5){
#Bootstrap and CI for small data set
bootsmall<-do(1000)*{prop(~ageclass, data=resample(ddatsmall), success="Fawn")}
confintSmall[i,]<-qdata(~prop_Fawn, p=c(0.025, 0.975), data=bootsmall)[,1]
#Bootstrap and CI for full data set
bootslarge<-do(1000)*{prop(~ageclass, data=resample(ddat), success="Fawn")}
confintLarge[i,]<-qdata(~prop_Fawn, p=c(0.025, 0.975), data=bootslarge)[,1]
hist(bootsmall$prop_Fawn, main="Bootstrap Distribution: Half Data")
hist(bootslarge$prop_Fawn, main="Bootstrap Distribution: Full Data")
}
Lets look at the confidence intervals
confintSmall # data set of 79 cases
## [,1] [,2]
## [1,] 0.1139241 0.2784810
## [2,] 0.1139241 0.2784810
## [3,] 0.1139241 0.2784810
## [4,] 0.1012658 0.2911392
## [5,] 0.1012658 0.2784810
confintLarge # data set with 318 cases
## [,1] [,2]
## [1,] 0.1446541 0.2327044
## [2,] 0.1477201 0.2327044
## [3,] 0.1446541 0.2327830
## [4,] 0.1477987 0.2327044
## [5,] 0.1446541 0.2358491
Now, lets consider the creating a bootstrap distribution for the full data set, but this time we will vary the number of bootstrap resamples (either 500 or 5000)
confint500 <-matrix(NA, 5,2) # confidence intervals using 500 boostraps
confint5000 <-matrix(NA, 5,2) # confidence intervals using 5000 bootstraps
par(mfrow=c(5,2)) # sets up a plotting window
for(i in 1:5){
#Bootstrap and CI for small data set
boot500<-do(500)*{prop(~ageclass, data=resample(ddat), success="Fawn")}
confint500[i,]<-qdata(~prop_Fawn, p=c(0.025, 0.975), data=boot500)[,1]
#Bootstrap and CI for full data set
boot5000<-do(5000)*{prop(~ageclass, data=resample(ddat), success="Fawn")}
confint5000[i,]<-qdata(~prop_Fawn, p=c(0.025, 0.975), data=boot5000)[,1]
hist(boot500$prop_Fawn, main="500 Bootstraps")
hist(boot5000$prop_Fawn, main="5000 Bootstraps")
}
confint500
## [,1] [,2]
## [1,] 0.1477987 0.2375000
## [2,] 0.1492925 0.2327044
## [3,] 0.1540881 0.2343553
## [4,] 0.1477987 0.2280660
## [5,] 0.1492925 0.2295597
confint5000
## [,1] [,2]
## [1,] 0.1446541 0.2295597
## [2,] 0.1446541 0.2327044
## [3,] 0.1446541 0.2327044
## [4,] 0.1477987 0.2327044
## [5,] 0.1477987 0.2327044