Computing estimates for the standard errors (1) using the bootstrap & (2) using the standard formula for computing the standard errors in the glm() function.

Q6) a)

library (ISLR)
set.seed(10)

mod_log <- glm(default ~ income + balance, data = Default, family = "binomial")

summary(mod_log)$coefficients[, 2]
##  (Intercept)       income      balance 
## 4.347564e-01 4.985167e-06 2.273731e-04

b)

boot.fn <- function(data, index = 1:nrow(data)) {
  coef(glm(default ~ income + balance, data = data, subset = index, family = "binomial"))[-1]
}

boot.fn(Default)
##       income      balance 
## 2.080898e-05 5.647103e-03

c) Estimate the std error of coeffiecents of logistic regressoion

set.seed(101)
library (boot)

boot_est= boot(data = Default, statistic = boot.fn, R=100)
boot_est
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = Default, statistic = boot.fn, R = 100)
## 
## 
## Bootstrap Statistics :
##         original       bias     std. error
## t1* 2.080898e-05 8.890686e-07 4.623026e-06
## t2* 5.647103e-03 4.102763e-05 2.215951e-04

d)

Std. Error with Boot function and glm method

sapply(data.frame(income = boot_est$t[ ,1], balance = boot_est$t[ ,2]), sd)
##       income      balance 
## 4.623026e-06 2.215951e-04
summary(mod_log)$coefficients[2:3, 2]
##       income      balance 
## 4.985167e-06 2.273731e-04

9)

library (MASS)

summary(Boston)
##       crim                zn             indus            chas        
##  Min.   : 0.00632   Min.   :  0.00   Min.   : 0.46   Min.   :0.00000  
##  1st Qu.: 0.08205   1st Qu.:  0.00   1st Qu.: 5.19   1st Qu.:0.00000  
##  Median : 0.25651   Median :  0.00   Median : 9.69   Median :0.00000  
##  Mean   : 3.61352   Mean   : 11.36   Mean   :11.14   Mean   :0.06917  
##  3rd Qu.: 3.67708   3rd Qu.: 12.50   3rd Qu.:18.10   3rd Qu.:0.00000  
##  Max.   :88.97620   Max.   :100.00   Max.   :27.74   Max.   :1.00000  
##       nox               rm             age              dis        
##  Min.   :0.3850   Min.   :3.561   Min.   :  2.90   Min.   : 1.130  
##  1st Qu.:0.4490   1st Qu.:5.886   1st Qu.: 45.02   1st Qu.: 2.100  
##  Median :0.5380   Median :6.208   Median : 77.50   Median : 3.207  
##  Mean   :0.5547   Mean   :6.285   Mean   : 68.57   Mean   : 3.795  
##  3rd Qu.:0.6240   3rd Qu.:6.623   3rd Qu.: 94.08   3rd Qu.: 5.188  
##  Max.   :0.8710   Max.   :8.780   Max.   :100.00   Max.   :12.127  
##       rad              tax           ptratio          black       
##  Min.   : 1.000   Min.   :187.0   Min.   :12.60   Min.   :  0.32  
##  1st Qu.: 4.000   1st Qu.:279.0   1st Qu.:17.40   1st Qu.:375.38  
##  Median : 5.000   Median :330.0   Median :19.05   Median :391.44  
##  Mean   : 9.549   Mean   :408.2   Mean   :18.46   Mean   :356.67  
##  3rd Qu.:24.000   3rd Qu.:666.0   3rd Qu.:20.20   3rd Qu.:396.23  
##  Max.   :24.000   Max.   :711.0   Max.   :22.00   Max.   :396.90  
##      lstat            medv      
##  Min.   : 1.73   Min.   : 5.00  
##  1st Qu.: 6.95   1st Qu.:17.02  
##  Median :11.36   Median :21.20  
##  Mean   :12.65   Mean   :22.53  
##  3rd Qu.:16.95   3rd Qu.:25.00  
##  Max.   :37.97   Max.   :50.00

a)

mean(Boston$medv)
## [1] 22.53281

b) Estimate of Std. Error of mean of medv

sd(Boston$medv) / sqrt(length(Boston$medv))
## [1] 0.4088611

c) Std. Error of mean usig boot strap

boot.fn <- function(vector, index) {
  mean(vector[index])
}

set.seed(123)

(boot_results <- boot(data = Boston$medv, statistic = boot.fn, R = 1000))
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = Boston$medv, statistic = boot.fn, R = 1000)
## 
## 
## Bootstrap Statistics :
##     original      bias    std. error
## t1* 22.53281 -0.01607372   0.4045557

d)

CI of Mean of medv

boot_results_SE <- sd(boot_results$t)
round(c(mean(Boston$medv) - 2*boot_results_SE, mean(Boston$medv) + 2*boot_results_SE), 4)
## [1] 21.7237 23.3419
## 


t.test(Boston$medv)
## 
##  One Sample t-test
## 
## data:  Boston$medv
## t = 55.111, df = 505, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  21.72953 23.33608
## sample estimates:
## mean of x 
##  22.53281

e) provide an estimate, ˆμmed, for the median value of medv in the population

median(Boston$medv)
## [1] 21.2

f) estimate, ˆμmed, for the median value usig Bootstrap

boot.fn <- function(vector, index) {
  median(vector[index])
}

set.seed(77, sample.kind = "Rounding")
## Warning in set.seed(77, sample.kind = "Rounding"): non-uniform 'Rounding'
## sampler used
(boot_results <- boot(data = Boston$medv, statistic = boot.fn, R = 1000))
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = Boston$medv, statistic = boot.fn, R = 1000)
## 
## 
## Bootstrap Statistics :
##     original  bias    std. error
## t1*     21.2  0.0094   0.3700318

g) tenth percentile of medv

quantile(Boston$medv, 0.1)
##   10% 
## 12.75

h) bootstrap to estimate the standard error of ˆμ0.1. Commenton your findings.

boot.fn <- function(vector, index) {
  quantile(vector[index], 0.1)
}

set.seed(77, sample.kind = "Rounding")
## Warning in set.seed(77, sample.kind = "Rounding"): non-uniform 'Rounding'
## sampler used
(boot_results <- boot(data = Boston$medv, statistic = boot.fn, R = 1000))
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = Boston$medv, statistic = boot.fn, R = 1000)
## 
## 
## Bootstrap Statistics :
##     original  bias    std. error
## t1*    12.75 0.02085    0.488873