Lab 9-2021

Task 1

getwd()

## [1] "C:/Users/Caleb/Documents/Civil Engineering degree coursework/Applied Statistical Methods/Labs/Lab 9"

Task 2

Line A takes a sample of size n*iter from x with replacement. Line B creates a \(100(1-\alpha)\%\) confidence interval using xstat.

Replacement is required because the sample itself has only n values. Thus, if you resample it with a size of n without replacement then you will just end up with the original sample again.

With Replacement

set.seed(35) # This will give everyone the same sample
sam=round(rnorm(20,mean=10,sd=4),2)
unique(sample(sam,20,replace=TRUE) )

##  [1] 12.05 23.35 11.40 14.26  8.43 13.46  2.37  8.62 17.35 11.64 14.76  9.82

unique(sample(sam,20,replace=TRUE) )

##  [1]  8.43 14.26  7.75  2.89 12.05  2.37  9.82 13.46  6.93 10.53 23.35 11.78
## [13]  8.62 11.40  9.86

unique(sample(sam,20,replace=TRUE) )

##  [1]  9.86 10.53 14.26 17.35 12.05  6.93 23.35  8.62 11.64 16.69 11.40

unique(sample(sam,20,replace=TRUE) )

##  [1] 11.64  9.82  8.62 17.35 11.78 11.40  7.75  2.37 23.35  2.89

unique(sample(sam,20,replace=TRUE) )

##  [1]  7.75 14.76 16.69 17.35 14.26  8.62 11.40  8.43 12.05  2.89 10.53 23.35

# repeat this line 5Xs

Each time it ran it output a different series of values. Although they don’t output 20 values which I would’ve expected.

Without Relacement

set.seed(35) # This will give everyone the same sample
sam=round(rnorm(20,mean=10,sd=4),2)
unique(sample(sam,20,replace=FALSE) )

##  [1] 12.05 23.35 11.40 17.35 14.26  8.43  8.62  2.89 16.69 10.53  8.00  6.93
## [13] 11.64  9.86  7.75 13.46 14.76  2.37  9.82 11.78

unique(sample(sam,20,replace=FALSE) )

##  [1]  8.43 14.76  9.82 16.69 11.64 14.26  7.75 12.05  8.00  8.62  2.37 10.53
## [13] 23.35 11.40 11.78  2.89  6.93 17.35  9.86 13.46

unique(sample(sam,20,replace=FALSE) )

##  [1]  2.89  9.86 10.53 14.26 17.35 12.05 11.40  6.93 23.35  2.37  8.62 11.78
## [13]  8.00 11.64 14.76  9.82 13.46 16.69  7.75  8.43

unique(sample(sam,20,replace=FALSE) )

##  [1] 11.64 17.35 23.35 11.40 16.69  9.82  2.89 10.53  9.86 13.46  8.43  7.75
## [13]  8.00 14.76 11.78  8.62 12.05  2.37  6.93 14.26

unique(sample(sam,20,replace=FALSE) )

##  [1] 11.78 16.69  2.37  8.62  7.75 12.05 14.76  9.82  9.86  8.00  2.89 14.26
## [13] 10.53 11.40  8.43 13.46 23.35 17.35 11.64  6.93

# repeat this line 5Xs

Each one has all of the same values. Also they all contain 20 values.

#sample(sam,21,replace=FALSE)

It gives an error because it can’t make a sample larger than the population.

Task 3

myboot2<-function(iter=10000,x,fun="mean",alpha=0.05,cx=1.5,...){  #Notice where the ... is repeated in the code
n=length(x)   #sample size

y=sample(x,n*iter,replace=TRUE)
rs.mat=matrix(y,nr=n,nc=iter,byrow=TRUE)
xstat=apply(rs.mat,2,fun) # xstat is a vector and will have iter values in it 
ci=quantile(xstat,c(alpha/2,1-alpha/2))# Nice way to form a confidence interval
# A histogram follows
# The object para will contain the parameters used to make the histogram
para=hist(xstat,freq=FALSE,las=1,
main=paste("Histogram of Bootstrap sample statistics","\n","alpha=",alpha," iter=",iter,sep=""),
...)

#mat will be a matrix that contains the data, this is done so that I can use apply()
mat=matrix(x,nr=length(x),nc=1,byrow=TRUE)

#pte is the point estimate
#This uses whatever fun is
pte=apply(mat,2,fun)
abline(v=pte,lwd=3,col="Black")# Vertical line
segments(ci[1],0,ci[2],0,lwd=4)      #Make the segment for the ci
text(ci[1],0,paste("(",round(ci[1],2),sep=""),col="Red",cex=cx)
text(ci[2],0,paste(round(ci[2],2),")",sep=""),col="Red",cex=cx)

# plot the point estimate 1/2 way up the density
text(pte,max(para$density)/2,round(pte,2),cex=cx)

invisible(list(ci=ci,fun=fun,x=x))# Some output to use if necessary
}
set.seed(39); sam=rnorm(25,mean=25,sd=10)
a=myboot2(iter=10000,sam,fun="mean",alpha=0.05)

qnorm(0.5,25,10)

## [1] 25

set.seed(30); sam=rchisq(20,df=3)
b=myboot2(iter=10000,sam,fun="mean",alpha=0.05)

qchisq(0.5,df=3)

## [1] 2.365974

set.seed(40); sam=rgamma(30,shape=2,scale=3)
c=myboot2(iter=10000,sam,fun="mean",alpha=0.05)

qgamma(0.5,shape=2,scale=3)

## [1] 5.035041

set.seed(10); sam=rbeta(20,shape1=3,shape2=4)
d=myboot2(iter=10000,sam,fun="mean",alpha=0.05)

qbeta(0.5,shape1=3,shape2=4)

## [1] 0.4214072

They come pretty close but are still a bit off in each instance.

Every confidence interval contains the population value except for the gamma distribution.

set.seed(39); sam=rnorm(25,mean=25,sd=10)
a=myboot2(iter=10000,sam,fun="var",alpha=0.2)

set.seed(30); sam=rchisq(20,df=3)
b=myboot2(iter=10000,sam,fun="var",alpha=0.2)

set.seed(40); sam=rgamma(30,shape=2,scale=3)
c=myboot2(iter=10000,sam,fun="var",alpha=0.2)

set.seed(10); sam=rbeta(20,shape1=3,shape2=4)
d=myboot2(iter=10000,sam,fun="var",alpha=0.2)

Task 4

myboot2<-function(iter=10000,x,fun="mean",alpha=0.05,cx=1.5,...){  #Notice where the ... is repeated in the code
n=length(x)   #sample size

y=sample(x,n*iter,replace=TRUE)
rs.mat=matrix(y,nr=n,nc=iter,byrow=TRUE)
xstat=apply(rs.mat,2,fun) # xstat is a vector and will have iter values in it
barplot(xstat)
ci=quantile(xstat,c(alpha/2,1-alpha/2))# Nice way to form a confidence interval
print(ci[1])
print(ci[2])
# A histogram follows
# The object para will contain the parameters used to make the histogram
para=hist(xstat,freq=FALSE,las=1,
main=paste("Histogram of Bootstrap sample statistics","\n","alpha=",alpha," iter=",iter,sep=""),
...)

#mat will be a matrix that contains the data, this is done so that I can use apply()
mat=matrix(x,nr=length(x),nc=1,byrow=TRUE)

#pte is the point estimate
#This uses whatever fun is
pte=apply(mat,2,fun)
abline(v=pte,lwd=3,col="Black")# Vertical line
segments(ci[1],0,ci[2],0,lwd=4)      #Make the segment for the ci
text(ci[1],0,paste("(",round(ci[1],2),sep=""),col="Red",cex=cx)
text(ci[2],0,paste(round(ci[2],2),")",sep=""),col="Red",cex=cx)

# plot the point estimate 1/2 way up the density
text(pte,max(para$density)/2,round(pte,2),cex=cx)

invisible(list(ci=ci,fun=fun,x=x))# Some output to use if necessary
}
sam=c(1,1,1,2,2,2,2,3,3,3,4,4)
a=myboot2(x=sam,fun="median")

## 2.5% 
##  1.5 
## 97.5% 
##     3

I refrained from having it print out xstat since that would output close to 1000 numerica values. It can be seen by adding the line “print(xstat)” into the function. The interval estimate is (1.5,3).

Task 5

mybootci<-function(iter=10000,x,fun="mean",alpha=0.05,...){  

#Notice where the ... is repeated in the code
n=length(x)   #sample size

#Now sample with replacement
y=sample(x,n*iter,replace=TRUE) #A

# Make a matrix with all the resampled values
rs.mat=matrix(y,nr=n,nc=iter,byrow=TRUE)

xstat=apply(rs.mat,2,fun)
# xstat is a vector and will have iter values in it 
ci=quantile(xstat,c(alpha/2,1-alpha/2))
print(ci)

return(list(ci=ci,fun=fun,x=x))# Some output to use if necessary
}

mynew <- function(x){mean(x)/median(x)} 

set.seed(39); sam=rnorm(25,mean=25,sd=10)
a=mybootci(iter=10000,sam,fun=mynew,alpha=0.05)

##      2.5%     97.5% 
## 0.9392537 1.1053853

set.seed(30); sam=rchisq(20,df=3)
b=mybootci(iter=10000,sam,fun=mynew,alpha=0.05)

##      2.5%     97.5% 
## 0.8809479 1.6392163

set.seed(40); sam=rgamma(30,shape=2,scale=3)
c=mybootci(iter=10000,sam,fun=mynew,alpha=0.05)

##      2.5%     97.5% 
## 0.8693493 1.2588008

set.seed(10); sam=rbeta(20,shape1=3,shape2=4)
d=mybootci(iter=10000,sam,fun=mynew,alpha=0.05)

##      2.5%     97.5% 
## 0.9003749 1.1320829

set.seed(39); sam=rnorm(25,mean=25,sd=10)
a=mybootci(iter=10000,sam,fun=mynew,alpha=0.3)

##       15%       85% 
## 0.9798003 1.0650228

set.seed(30); sam=rchisq(20,df=3)
b=mybootci(iter=10000,sam,fun=mynew,alpha=0.3)

##      15%      85% 
## 1.016359 1.462286

set.seed(40); sam=rgamma(30,shape=2,scale=3)
c=mybootci(iter=10000,sam,fun=mynew,alpha=0.3)

##       15%       85% 
## 0.9324518 1.0941934

set.seed(10); sam=rbeta(20,shape1=3,shape2=4)
d=mybootci(iter=10000,sam,fun=mynew,alpha=0.3)

##       15%       85% 
## 0.9540283 1.0755370

Task 6

myboot2<-function(iter=10000,x,fun="mean",alpha=0.05,cx=1.5,...){  #Notice where the ... is repeated in the code
n=length(x)   #sample size

y=sample(x,n*iter,replace=TRUE)
rs.mat=matrix(y,nr=n,nc=iter,byrow=TRUE)
xstat=apply(rs.mat,2,fun) # xstat is a vector and will have iter values in it 
ci=quantile(xstat,c(alpha/2,1-alpha/2))# Nice way to form a confidence interval
# A histogram follows
# The object para will contain the parameters used to make the histogram
para=hist(xstat,freq=FALSE,las=1,
main=paste("Histogram of Bootstrap sample statistics","\n","alpha=",alpha," iter=",iter,sep=""),
...)

#mat will be a matrix that contains the data, this is done so that I can use apply()
mat=matrix(x,nr=length(x),nc=1,byrow=TRUE)

#pte is the point estimate
#This uses whatever fun is
pte=apply(mat,2,fun)
abline(v=pte,lwd=3,col="Black")# Vertical line
segments(ci[1],0,ci[2],0,lwd=4)      #Make the segment for the ci
text(ci[1],0,paste("(",round(ci[1],2),sep=""),col="Red",cex=cx)
text(ci[2],0,paste(round(ci[2],2),")",sep=""),col="Red",cex=cx)

# plot the point estimate 1/2 way up the density
text(pte,max(para$density)/2,round(pte,2),cex=cx)

invisible(list(ci=ci,fun=fun,x=x))# Some output to use if necessary
}

sam=rcauchy(20,location=0,scale=1)
a1=myboot2(iter=10000,sam,fun="mean",alpha=0.2)

a2=myboot2(iter=10000,sam,fun="var",alpha=0.2)

sam=rlnorm(20,meanlog=0,sdlog=1)
b1=myboot2(iter=10000,sam,fun="mean",alpha=0.2)

b2=myboot2(iter=10000,sam,fun="var",alpha=0.2)

sam=rgeom(20,0.7)
c1=myboot2(iter=10000,sam,fun="mean",alpha=0.2)

c2=myboot2(iter=10000,sam,fun="var",alpha=0.2)

sam=rmultinom(20, 100, 0.7)
d1=myboot2(iter=10000,sam,fun="mean",alpha=0.2)

d2=myboot2(iter=10000,sam,fun="var",alpha=0.2)

Task 7

set.seed(68)
sam=rnorm(20,mean=10,sd=4)
a=myboot2(iter=10000,sam,fun="IQR",alpha=0.05)

b=myboot2(iter=10000,sam,fun="sd",alpha=0.05)

set.seed(68)
sam=rnorm(20,mean=10,sd=4)

t.test(sam,conf.level=0.95)$conf.int

## [1]  8.818145 12.100478
## attr(,"conf.level")
## [1] 0.95

a=mybootci(iter=10000,sam,fun="mean",alpha=0.05)

##      2.5%     97.5% 
##  8.862478 11.878680

They actually compare quite nicely. The upper limits differ by about 0.3 but taking more iterations with the bootstrap code would correct that.

Task 8

fire=read.csv("FIREDAM.csv")
obj=MATH4753GRAY::myboot2(x=fire$DAMAGE)

I will try to fix this but my package wasn’t exporting the data set correstly.