library(ISLR)
## Warning: package 'ISLR' was built under R version 4.0.5
data("Default")
summary(Default)
## default student balance income
## No :9667 No :7056 Min. : 0.0 Min. : 772
## Yes: 333 Yes:2944 1st Qu.: 481.7 1st Qu.:21340
## Median : 823.6 Median :34553
## Mean : 835.4 Mean :33517
## 3rd Qu.:1166.3 3rd Qu.:43808
## Max. :2654.3 Max. :73554
lr1 = glm(default ~ balance + income, data = Default, family = binomial)
summary(lr1)
##
## Call:
## glm(formula = default ~ balance + income, family = binomial,
## data = Default)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4725 -0.1444 -0.0574 -0.0211 3.7245
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.154e+01 4.348e-01 -26.545 < 2e-16 ***
## balance 5.647e-03 2.274e-04 24.836 < 2e-16 ***
## income 2.081e-05 4.985e-06 4.174 2.99e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2920.6 on 9999 degrees of freedom
## Residual deviance: 1579.0 on 9997 degrees of freedom
## AIC: 1585
##
## Number of Fisher Scoring iterations: 8
trainDefault = sample(dim(Default)[1], dim(Default)[1]*0.50)
testDefault = Default[-trainDefault, ]
lr1 = glm(default ~ balance + income, data = Default, family = binomial, subset = trainDefault)
log.prob_def = predict(lr1, testDefault, type = "response")
log.pred_def = rep("No", dim(Default)[1]*0.50)
log.pred_def[log.prob_def > 0.5] = "Yes"
table(log.pred_def, testDefault$default)
##
## log.pred_def No Yes
## No 4815 106
## Yes 21 58
mean(log.pred_def !=testDefault$default)
## [1] 0.0254
#i
trainDefault = sample(dim(Default)[1], dim(Default)[1]*0.50)
testDefault = Default[-trainDefault, ]
#ii
lr2 = glm(default ~ balance + income, data = Default, family = binomial, subset = trainDefault)
#iii
log.prob_def = predict(lr2, testDefault, type = "response")
log.pred_def = rep("No", dim(Default)[1]*0.50)
log.pred_def[log.prob_def > 0.5] = "Yes"
table(log.pred_def, testDefault$default)
##
## log.pred_def No Yes
## No 4811 120
## Yes 20 49
mean(log.pred_def !=testDefault$default)
## [1] 0.028
#i
trainDefault = sample(dim(Default)[1], dim(Default)[1]*0.50)
testDefault = Default[-trainDefault, ]
#ii
lr3 = glm(default ~ balance + income, data = Default, family = binomial, subset = trainDefault)
#iii
log.prob_def = predict(lr3, testDefault, type = "response")
log.pred_def = rep("No", dim(Default)[1]*0.50)
log.pred_def[log.prob_def > 0.5] = "Yes"
table(log.pred_def, testDefault$default)
##
## log.pred_def No Yes
## No 4810 109
## Yes 25 56
mean(log.pred_def !=testDefault$default)
## [1] 0.0268
#i
trainDefault = sample(dim(Default)[1], dim(Default)[1]*0.50)
testDefault = Default[-trainDefault, ]
#ii
lr4 = glm(default ~ balance + income, data = Default, family = binomial, subset = trainDefault)
#iii
log.prob_def = predict(lr4, testDefault, type = "response")
log.pred_def = rep("No", dim(Default)[1]*0.50)
log.pred_def[log.prob_def > 0.5] = "Yes"
table(log.pred_def, testDefault$default)
##
## log.pred_def No Yes
## No 4822 106
## Yes 24 48
mean(log.pred_def !=testDefault$default)
## [1] 0.026
#i
trainDefault = sample(dim(Default)[1], dim(Default)[1]*0.50)
testDefault = Default[-trainDefault, ]
#ii
lr5 = glm(default ~ balance + income + student, data = Default, family = binomial, subset = trainDefault)
#iii
log.prob_def = predict(lr5, testDefault, type = "response")
log.pred_def = rep("No", dim(Default)[1]*0.50)
log.pred_def[log.prob_def > 0.5] = "Yes"
table(log.pred_def, testDefault$default)
##
## log.pred_def No Yes
## No 4822 100
## Yes 29 49
mean(log.pred_def !=testDefault$default)
## [1] 0.0258
#####(a) Using the summary() and glm() functions, determine the estimated standard errors for the coefficients associated with income and balance in a multiple logistic regression model that uses both predictors.
lr6 = glm(default ~ balance + income, data = Default, family = binomial)
summary(lr6)
##
## Call:
## glm(formula = default ~ balance + income, family = binomial,
## data = Default)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4725 -0.1444 -0.0574 -0.0211 3.7245
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.154e+01 4.348e-01 -26.545 < 2e-16 ***
## balance 5.647e-03 2.274e-04 24.836 < 2e-16 ***
## income 2.081e-05 4.985e-06 4.174 2.99e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2920.6 on 9999 degrees of freedom
## Residual deviance: 1579.0 on 9997 degrees of freedom
## AIC: 1585
##
## Number of Fisher Scoring iterations: 8
boot.fn = function(data, index) return(coef(glm(default ~ balance + income, data = data, family = binomial, subset = index)))
library(boot)
boot(Default, boot.fn, 100)
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = Default, statistic = boot.fn, R = 100)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* -1.154047e+01 -3.334939e-02 4.537593e-01
## t2* 5.647103e-03 1.442870e-05 2.248760e-04
## t3* 2.080898e-05 2.447808e-07 5.056124e-06
library(MASS)
attach(Boston)
mu.hat <- mean(medv)
mu.hat
## [1] 22.53281
stand_error_hat <- sd(medv) / sqrt(dim(Boston)[1])
stand_error_hat
## [1] 0.4088611
set.seed(1)
boot.fn <- function(data, index) {
mu <- mean(data[index])
return (mu)
}
boot(medv, boot.fn, 1000)
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = medv, statistic = boot.fn, R = 1000)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 22.53281 0.007650791 0.4106622
t.test(medv)
##
## One Sample t-test
##
## data: medv
## t = 55.111, df = 505, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 21.72953 23.33608
## sample estimates:
## mean of x
## 22.53281
CI.mu.hat <- c(22.53 - 2 * 0.4119, 22.53 + 2 * 0.4119)
CI.mu.hat
## [1] 21.7062 23.3538
median.medv = median(medv)
median.medv
## [1] 21.2
boot.3 = function(data, index) return(median(data[index]))
boot3 = boot(medv, boot.3, 1000)
boot3
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = medv, statistic = boot.3, R = 1000)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 21.2 -0.0386 0.3770241
ten_perc= quantile(medv, c(0.1))
ten_perc
## 10%
## 12.75
boot.4 = function(data, index) return(quantile(data[index], c(0.1)))
boot4 = boot(medv, boot.4, 1000)
boot4
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = medv, statistic = boot.4, R = 1000)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 12.75 0.0186 0.4925766