Please note: All the Answers have been rounded to four decimal points
Question 1
Question 1.1. Approximate the bootstrap standard error and bias of the population coefficient of variance
#Reading in the dataset for question 1 and defining some basic values
datset1 <- c(10.1,14.8,5.0,10.2,12.4,12.2,2.0,11.5,17.8,4.0)
n <- length(datset1)
B <- 1000
R <- 100
#Doing the bootstrap
stats <- rep(0,B)
for(i in 1:B){
boot <- sample(datset1, n, replace = TRUE)
stats[i] <- sd(boot)/abs(mean(boot))
}
std <- sd(stats)
bias <- mean(stats) - sd(datset1)/abs(mean(datset1))
Answer: The approximation for the bootstrap standard error is 0.1223 and for the bias is -0.0242
Question 1.2 Approximating the bootstrap standard error of the estimated bootstrap standard error of the estimated coefficient of variance
stats <- rep(0,B)
for(i in 1:B) {
boot <- sample(datset1, n, replace = TRUE)
stats2 <- rep(0,R)
for(j in 1:R) {
boot2 <- sample(boot, n, replace = TRUE)
stats2[j] <- sd(boot2)/abs(mean(boot2))
}
stats[i] <- sd(stats2)
}
std2 <- sd(stats)
Answer: The approximate value for the bootstrap standard error of the estimated bootstrap standard error of the coefficient of variance is 0.0275
Question 2
Question 2.1. Constructing a 90% confidence interval for the population variance using three different methods
a) Using the basic percentile method
#Setting the Type 1 error level
alpha <- 0.1
#Defining expressions for "r" and "s" to use in determining interval quantiles
r <- floor((B+1) * alpha/2)
s <- floor((B+1) * (1-(alpha/2)))
#Determining the confidence intervals
stats <- rep(0,B)
for(i in 1:B) {
boot <- sample(datset1, n, replace = TRUE)
stats[i] <- var(boot)
}
stats_sorted <- sort(stats)
ci_basic <- c(stats_sorted[r], stats_sorted[s])
Answer: The 90% confidence interval using the basic percentile method would be 9.7427, 35.9093
b) Using the bias-corrected (of the basic) percentile method
#Doing the bootstrap
stats <- rep(0,B)
for(i in 1:B) {
boot <- sample(datset1, n, replace = TRUE)
stats[i] <- var(boot)
}
#Determining the estimated proportion and "z0"
g <- mean(stats <= var(datset1))
z0 <- qnorm(g)
#Defining the interval quantiles "r" and "s"
r <- floor((B+1) * pnorm(2*z0 - qnorm(1-alpha/2)))
s <- floor((B+1) * pnorm(2*z0 + qnorm(1-alpha/2)))
stats.sorted <- sort(stats)
ci_basic_BC <- c(stats.sorted[r],stats.sorted[s])
Answer: The 90% confidence interval using the B-C method would be 14.3672, 43.5093
c) Using the bootstrap-t method
#Redifining "r" and "s" values
r <- floor((B+1) * (1-(alpha/2)))
s <- floor((B+1) * alpha/2)
#Doing the double boostrap
Tstats <- rep(0,B)
for(i in 1:B) {
boot <- sample(datset1, n, replace = TRUE)
stats2 <- rep(0,R)
for(j in 1:R) {
boot2 <- sample(boot, n, replace = TRUE)
stats2[j] <- var(boot2)
}
se_boot <- sd(stats2)
Tstats[i] <- (var(boot) - var(datset1))/se_boot
}
Tstats_sorted <- sort(Tstats)
ci_t <- c(var(datset1) - Tstats_sorted[r]* sd(datset1), var(datset1) - Tstats_sorted[s]*sd(datset1))
Answer: The 90% confidence interval using the bootstrap-t method would be 18.6482, 46.2356
Question 2.2: Constructing a 95% basic percentile confidence interval for the population third quartile using the parametric bootstrap.
# Redefining "r", "s", alpha and an expression for the third quartile of a general n = 7 dataset
r <- floor((B+1) * (alpha/2))
s <- floor((B+1) * (1-(alpha/2)))
alpha <- 0.05
q3 <- floor(n*0.75)
a) Assuming a normal distribution
stats <- rep(0,B)
for(i in 1:B) {
boot <- rnorm(n = n, mean = mean(datset1), sd = sd(datset1))
boot_sorted <- sort(boot)
stats[i] <- boot_sorted[q3]
}
stats_sorted <- sort(stats)
ci_norm <- c(stats_sorted[r], stats_sorted[s])
Answer: The 95% confidence interval of the population third quartile, assuming normal distribution is 8.9315, 15.0377
Question 3
#Reading in the data
x1 <- c(2.6,3.4,3.6,3.2,3.5,2.9,3.3)
x2 <- c(5,4,10,5,6,7,8)
y <- c(3300, 3600, 4000, 3500, 3900, 3600, 3800)
Question 3.1. Using the non-parametric bootstrap, testing hypothesis of beta2 = 30 at 10% significance
#Fitting a model and defining its summary
fit <- lm(y ~ x1 + x2)
sumfit <- summary(fit)
#Defining the vector of sample residuals and centering them
res <- fit$residuals
res0 <- res - mean(res)
#Defining the given null hypothesis value of beta2
beta2_null <- 30
#Defining the observed test statistic
t_stat <- (fit$coefficients[3] - beta2_null)/sumfit$coefficients[3,2]
#Defining additional parameters
n <- length(x1)
B <- 1000
#Doing the bootstrap
stats <- rep(0,B)
for(i in 1:B) {
boot_res0 <- sample(res0, n, replace = TRUE)
vstars <- fit$coefficients[1] + fit$coefficients[2]*x1 + beta2_null*x2 + boot_res0
boot_fit <- lm(vstars ~ x1 + x2)
sumbootfit <- summary(boot_fit)
stats[i] <- (boot_fit$coefficients[3] - beta2_null)/sumbootfit$coefficients[3,2]
}
stats_sorted <- sort(stats)
#Defining alpha, the critical value quantile "s", and the critical value
alpha <- 0.1
s <- floor(B * (1-alpha))
c_value <- stats_sorted[s]
#Considering if the observed t-statistic is larger than the critical value:
if(t_stat >= c_value) {result <- "reject the null hypothesis"} else {result <- "fail to reject the null hypothesis"}
#And calculating the bootstrap p-value
p_val <- mean(stats >= t_stat)
if(p_val <= alpha) {p_result <- "reject the null hypothesis"} else {p_result <- "failure to reject the null hypothesis"}
Answer: given the bootstrap estimate of the critical value 1.597 and the observed test statistic of 1.4264, the result of the hypothesis test at a 10% significance level is to fail to reject the null hypothesis. And, given a p-value of 0.119, which when compared with an alpha of 0.1 confirms the outcome of a failure to reject the null hypothesis.
Question 3.2. Determining a 90% confidence interval for beta1, using the hybrid method and assuming heteroscedasticity.
#Given the heteroskedasticity of the data, apply a Wild bootstrap to the centered residuals
stats <- rep(0,B)
for(i in 1:B) {
e_boot <- rep(0,n)
for(j in 1:n) {
k <- sample(c(1,-1), 1)
e_boot[j] <- res0[j] * k
}
ystars <- fit$coefficients[1] + fit$coefficients[2]*x1 + fit$coefficients[3]*x2 + e_boot
boot_fit <- lm(ystars ~ x1 + x2)
stats[i] <- boot_fit$coefficients[2]
}
#Applying order statistics, setting the value of alpha and defining "r" and "s" in context of hybrid method.
stats_sorted <- sort(stats)
alpha <- 0.1
r <- floor(B * (1-(alpha/2)))
s <- floor(B * (alpha/2))
#Calculating the required confidence interval using hybrid method
confint <- c(2 * fit$coefficients[2] - stats_sorted[r], 2 * fit$coefficients[2] - stats_sorted[s])
Answer: The 90% confidence interval for beta1 assuming heteroscedasticity and using the hybrid method is: 372.327, 568.9212
Question 4
Testing at a 5% significance level whether the population mean value of houses with four bedrooms is more than the population mean value of houses with three bedrooms.
#Reading in the data
datset3 <- read.table("data.set3.txt", header = TRUE)
#Subsetting to three and four bedrooms only:
set.3bed <- datset3[datset3$bed == 3,1]
set.4bed <- datset3[datset3$bed == 4,1]
#Setting some basic values:
n <- length(set.3bed)
m <- length(set.4bed)
alpha <- 0.05
B <- 1000
s <- floor(B * (1-alpha))
#Transforming the data to ensure the sets mimic the null hypotheses of equal means.
v3 <- set.3bed - mean(set.3bed)
v4 <- set.4bed - mean(set.4bed)
#Defining the observed t-value
t_obs <- (mean(set.4bed) - mean(set.3bed))/sqrt(var(set.4bed)/m + var(set.3bed)/n)
#Doing the bootstrap
t_stats <- rep(0,B)
for(i in 1:B) {
boot_v3 <- sample(v3, n, replace = TRUE)
boot_v4 <- sample(v4, m, replace = TRUE)
t_stats[i] <- (mean(boot_v4) - mean(boot_v3))/sqrt(var(boot_v4)/m + var(boot_v3)/n)
}
#Doing the order statistics and identifying the estimate of the critical value
t_stats_sorted <- sort(t_stats)
c_val <- t_stats_sorted[s]
#Comparing the observed vs critical values
if(t_obs >= c_val) {result <- "reject the null hypothesis"} else {result <- "fail to reject the null hypothesis"}
#Considering the p-value
p_value <- mean(t_stats >= t_obs)
if(p_value <= alpha) {p_result <- "rejection of the null hypothesis"} else {p_result <- "failure to reject the null hypothesis"}
Answer: Given a critical value of 1.4845 and an observed test statistic of 2.4273 we need to reject the null hypothesis. And, given a p-value of 0.004 against an alpha of 0.05, the rejection of the null hypothesis can be confirmed.
END