Q6) a)
library (ISLR)
set.seed(10)
mod_log <- glm(default ~ income + balance, data = Default, family = "binomial")
summary(mod_log)$coefficients[, 2]
## (Intercept) income balance
## 4.347564e-01 4.985167e-06 2.273731e-04
b)
boot.fn <- function(data, index = 1:nrow(data)) {
coef(glm(default ~ income + balance, data = data, subset = index, family = "binomial"))[-1]
}
boot.fn(Default)
## income balance
## 2.080898e-05 5.647103e-03
c) Estimate the std error of coeffiecents of logistic
regressoion
set.seed(101)
library (boot)
boot_est= boot(data = Default, statistic = boot.fn, R=100)
boot_est
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = Default, statistic = boot.fn, R = 100)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 2.080898e-05 8.890686e-07 4.623026e-06
## t2* 5.647103e-03 4.102763e-05 2.215951e-04
d)
Std. Error with Boot function and glm method
sapply(data.frame(income = boot_est$t[ ,1], balance = boot_est$t[ ,2]), sd)
## income balance
## 4.623026e-06 2.215951e-04
summary(mod_log)$coefficients[2:3, 2]
## income balance
## 4.985167e-06 2.273731e-04
9)
library (MASS)
summary(Boston)
## crim zn indus chas
## Min. : 0.00632 Min. : 0.00 Min. : 0.46 Min. :0.00000
## 1st Qu.: 0.08205 1st Qu.: 0.00 1st Qu.: 5.19 1st Qu.:0.00000
## Median : 0.25651 Median : 0.00 Median : 9.69 Median :0.00000
## Mean : 3.61352 Mean : 11.36 Mean :11.14 Mean :0.06917
## 3rd Qu.: 3.67708 3rd Qu.: 12.50 3rd Qu.:18.10 3rd Qu.:0.00000
## Max. :88.97620 Max. :100.00 Max. :27.74 Max. :1.00000
## nox rm age dis
## Min. :0.3850 Min. :3.561 Min. : 2.90 Min. : 1.130
## 1st Qu.:0.4490 1st Qu.:5.886 1st Qu.: 45.02 1st Qu.: 2.100
## Median :0.5380 Median :6.208 Median : 77.50 Median : 3.207
## Mean :0.5547 Mean :6.285 Mean : 68.57 Mean : 3.795
## 3rd Qu.:0.6240 3rd Qu.:6.623 3rd Qu.: 94.08 3rd Qu.: 5.188
## Max. :0.8710 Max. :8.780 Max. :100.00 Max. :12.127
## rad tax ptratio black
## Min. : 1.000 Min. :187.0 Min. :12.60 Min. : 0.32
## 1st Qu.: 4.000 1st Qu.:279.0 1st Qu.:17.40 1st Qu.:375.38
## Median : 5.000 Median :330.0 Median :19.05 Median :391.44
## Mean : 9.549 Mean :408.2 Mean :18.46 Mean :356.67
## 3rd Qu.:24.000 3rd Qu.:666.0 3rd Qu.:20.20 3rd Qu.:396.23
## Max. :24.000 Max. :711.0 Max. :22.00 Max. :396.90
## lstat medv
## Min. : 1.73 Min. : 5.00
## 1st Qu.: 6.95 1st Qu.:17.02
## Median :11.36 Median :21.20
## Mean :12.65 Mean :22.53
## 3rd Qu.:16.95 3rd Qu.:25.00
## Max. :37.97 Max. :50.00
a)
mean(Boston$medv)
## [1] 22.53281
b) Estimate of Std. Error of mean of medv
sd(Boston$medv) / sqrt(length(Boston$medv))
## [1] 0.4088611
c) Std. Error of mean usig boot strap
boot.fn <- function(vector, index) {
mean(vector[index])
}
set.seed(123)
(boot_results <- boot(data = Boston$medv, statistic = boot.fn, R = 1000))
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = Boston$medv, statistic = boot.fn, R = 1000)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 22.53281 -0.01607372 0.4045557
d)
CI of Mean of medv
boot_results_SE <- sd(boot_results$t)
round(c(mean(Boston$medv) - 2*boot_results_SE, mean(Boston$medv) + 2*boot_results_SE), 4)
## [1] 21.7237 23.3419
##
t.test(Boston$medv)
##
## One Sample t-test
##
## data: Boston$medv
## t = 55.111, df = 505, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 21.72953 23.33608
## sample estimates:
## mean of x
## 22.53281
g) tenth percentile of medv
quantile(Boston$medv, 0.1)
## 10%
## 12.75