BootstrapProbability = function(n) {
return (((n - 1) / n) ^ n)
}
plot(1:100000, BootstrapProbability(1:100000))
It seems that the probability converges for big n.
store=rep(NA, 10000)
for(i in 1:10000) {
store[i]=sum(sample (1:100, rep=TRUE)==4) >0
}
mean(store)
## [1] 0.6406
The results align with those in subproblem g.
library(ISLR)
glm.fit = glm(default ~ income + balance, data = Default, family = binomial)
summary(glm.fit)
##
## Call:
## glm(formula = default ~ income + balance, family = binomial,
## data = Default)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4725 -0.1444 -0.0574 -0.0211 3.7245
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.154e+01 4.348e-01 -26.545 < 2e-16 ***
## income 2.081e-05 4.985e-06 4.174 2.99e-05 ***
## balance 5.647e-03 2.274e-04 24.836 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2920.6 on 9999 degrees of freedom
## Residual deviance: 1579.0 on 9997 degrees of freedom
## AIC: 1585
##
## Number of Fisher Scoring iterations: 8
testAndValidate = function() {
n = nrow(Default)
testSplit = sample(n, floor(n * 0.8), replace = F)
glm.fit = glm(default ~ income + balance, data = Default[testSplit, ], family=binomial)
glm.pred <- ifelse(
predict(glm.fit, Default[-testSplit, ], type = "response") > 0.5, "Yes", "No")
return(1 - mean(glm.pred != Default[-testSplit, ]$default))
}
replicate(4, testAndValidate())
## [1] 0.9690 0.9695 0.9710 0.9750
dummyTestAndValidate = function() {
n = nrow(Default)
testSplit = sample(n, floor(n * 0.8), replace = F)
glm.fit = glm(default ~ income + balance + student, data = Default[testSplit, ], family=binomial)
glm.pred <- ifelse(
predict(glm.fit, Default[-testSplit, ], type = "response") > 0.5, "Yes", "No")
return(1 - mean(glm.pred != Default[-testSplit, ]$default))
}
replicate(4, dummyTestAndValidate())
## [1] 0.9780 0.9745 0.9695 0.9740
Accuracy is around 97.5%. No noticeable difference when adding the dummy value.
library(boot)
boot_out <- boot(Default, function(data, index) return(coef(glm(default ~ income + balance,
data = data, family = binomial, subset = index))), 100)
boot_out
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = Default, statistic = function(data, index) return(coef(glm(default ~
## income + balance, data = data, family = binomial, subset = index))),
## R = 100)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* -1.154047e+01 -8.823348e-02 4.541883e-01
## t2* 2.080898e-05 1.133053e-06 5.022541e-06
## t3* 5.647103e-03 3.159101e-05 2.470898e-04