Q6) a)

library (ISLR)
set.seed(10)

mod_log <- glm(default ~ income + balance, data = Default, family = "binomial")

summary(mod_log)$coefficients[, 2]

##  (Intercept)       income      balance 
## 4.347564e-01 4.985167e-06 2.273731e-04

b)

boot.fn <- function(data, index = 1:nrow(data)) {
  coef(glm(default ~ income + balance, data = data, subset = index, family = "binomial"))[-1]
}

boot.fn(Default)

##       income      balance 
## 2.080898e-05 5.647103e-03

c) Estimate the std error of coeffiecents of logistic regressoion

set.seed(101)
library (boot)

boot_est= boot(data = Default, statistic = boot.fn, R=100)
boot_est

## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = Default, statistic = boot.fn, R = 100)
## 
## 
## Bootstrap Statistics :
##         original       bias     std. error
## t1* 2.080898e-05 8.890686e-07 4.623026e-06
## t2* 5.647103e-03 4.102763e-05 2.215951e-04

d)

Std. Error with Boot function and glm method

sapply(data.frame(income = boot_est$t[ ,1], balance = boot_est$t[ ,2]), sd)

##       income      balance 
## 4.623026e-06 2.215951e-04

summary(mod_log)$coefficients[2:3, 2]

##       income      balance 
## 4.985167e-06 2.273731e-04

9)

library (MASS)

summary(Boston)

##       crim                zn             indus            chas        
##  Min.   : 0.00632   Min.   :  0.00   Min.   : 0.46   Min.   :0.00000  
##  1st Qu.: 0.08205   1st Qu.:  0.00   1st Qu.: 5.19   1st Qu.:0.00000  
##  Median : 0.25651   Median :  0.00   Median : 9.69   Median :0.00000  
##  Mean   : 3.61352   Mean   : 11.36   Mean   :11.14   Mean   :0.06917  
##  3rd Qu.: 3.67708   3rd Qu.: 12.50   3rd Qu.:18.10   3rd Qu.:0.00000  
##  Max.   :88.97620   Max.   :100.00   Max.   :27.74   Max.   :1.00000  
##       nox               rm             age              dis        
##  Min.   :0.3850   Min.   :3.561   Min.   :  2.90   Min.   : 1.130  
##  1st Qu.:0.4490   1st Qu.:5.886   1st Qu.: 45.02   1st Qu.: 2.100  
##  Median :0.5380   Median :6.208   Median : 77.50   Median : 3.207  
##  Mean   :0.5547   Mean   :6.285   Mean   : 68.57   Mean   : 3.795  
##  3rd Qu.:0.6240   3rd Qu.:6.623   3rd Qu.: 94.08   3rd Qu.: 5.188  
##  Max.   :0.8710   Max.   :8.780   Max.   :100.00   Max.   :12.127  
##       rad              tax           ptratio          black       
##  Min.   : 1.000   Min.   :187.0   Min.   :12.60   Min.   :  0.32  
##  1st Qu.: 4.000   1st Qu.:279.0   1st Qu.:17.40   1st Qu.:375.38  
##  Median : 5.000   Median :330.0   Median :19.05   Median :391.44  
##  Mean   : 9.549   Mean   :408.2   Mean   :18.46   Mean   :356.67  
##  3rd Qu.:24.000   3rd Qu.:666.0   3rd Qu.:20.20   3rd Qu.:396.23  
##  Max.   :24.000   Max.   :711.0   Max.   :22.00   Max.   :396.90  
##      lstat            medv      
##  Min.   : 1.73   Min.   : 5.00  
##  1st Qu.: 6.95   1st Qu.:17.02  
##  Median :11.36   Median :21.20  
##  Mean   :12.65   Mean   :22.53  
##  3rd Qu.:16.95   3rd Qu.:25.00  
##  Max.   :37.97   Max.   :50.00

a)

mean(Boston$medv)

## [1] 22.53281

b) Estimate of Std. Error of mean of medv

sd(Boston$medv) / sqrt(length(Boston$medv))

## [1] 0.4088611

c) Std. Error of mean usig boot strap

boot.fn <- function(vector, index) {
  mean(vector[index])
}

set.seed(123)

(boot_results <- boot(data = Boston$medv, statistic = boot.fn, R = 1000))

## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = Boston$medv, statistic = boot.fn, R = 1000)
## 
## 
## Bootstrap Statistics :
##     original      bias    std. error
## t1* 22.53281 -0.01607372   0.4045557

d)

CI of Mean of medv

boot_results_SE <- sd(boot_results$t)
round(c(mean(Boston$medv) - 2*boot_results_SE, mean(Boston$medv) + 2*boot_results_SE), 4)

## [1] 21.7237 23.3419

## 


t.test(Boston$medv)

## 
##  One Sample t-test
## 
## data:  Boston$medv
## t = 55.111, df = 505, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  21.72953 23.33608
## sample estimates:
## mean of x 
##  22.53281

e) provide an estimate, ˆμmed, for the median value of medv in the population

median(Boston$medv)

## [1] 21.2

f) estimate, ˆμmed, for the median value usig Bootstrap

boot.fn <- function(vector, index) {
  median(vector[index])
}

set.seed(77, sample.kind = "Rounding")

## Warning in set.seed(77, sample.kind = "Rounding"): non-uniform 'Rounding'
## sampler used

(boot_results <- boot(data = Boston$medv, statistic = boot.fn, R = 1000))

## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = Boston$medv, statistic = boot.fn, R = 1000)
## 
## 
## Bootstrap Statistics :
##     original  bias    std. error
## t1*     21.2  0.0094   0.3700318

g) tenth percentile of medv

quantile(Boston$medv, 0.1)

##   10% 
## 12.75

h) bootstrap to estimate the standard error of ˆμ0.1. Commenton your findings.

boot.fn <- function(vector, index) {
  quantile(vector[index], 0.1)
}

set.seed(77, sample.kind = "Rounding")

## Warning in set.seed(77, sample.kind = "Rounding"): non-uniform 'Rounding'
## sampler used

(boot_results <- boot(data = Boston$medv, statistic = boot.fn, R = 1000))

## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = Boston$medv, statistic = boot.fn, R = 1000)
## 
## 
## Bootstrap Statistics :
##     original  bias    std. error
## t1*    12.75 0.02085    0.488873

Bootstrap & Logistic Regression

Shubham Suryawanshi

2022-10-25

Computing estimates for the standard errors (1) using the bootstrap & (2) using the standard formula for computing the standard errors in the glm() function.

Q6) a)

b)

c) Estimate the std error of coeffiecents of logistic regressoion

d)

Std. Error with Boot function and glm method

9)

a)

b) Estimate of Std. Error of mean of medv

c) Std. Error of mean usig boot strap

d)

CI of Mean of medv

e) provide an estimate, ˆμmed, for the median value of medv in the population

f) estimate, ˆμmed, for the median value usig Bootstrap

g) tenth percentile of medv

h) bootstrap to estimate the standard error of ˆμ0.1. Commenton your findings.