Juan Olazaba 11/13/2023
Problems (a) - (f) are provided on another pdf as they were hand written.
If we would like to get a closer look at what is going with the function, we can scale it down.
# Generate x values
x <- 1:50
# Calculate y values
y <- 1 - (1 - 1/x)^x
# Plot with a logarithmic scale on the y-axis
plot(x, y, log = "y", type = "o", col = "red", lwd = 2, xlab = "x", ylab = "y",
main = "Plot with Logarithmic Y-axis Scale, Scaled down")store <- rep(NA, 10000)
for(i in 1:10000){
store[i] <- sum(sample(1:100, rep=TRUE) == 4) > 0
}
mean(store)## [1] 0.6309
We continue to consider the use of a logistic regression model to predict the probability of default using income and balance on the Default data set. In particular, we will now compute estimates for the standard errors of the income and balance logistic regression coefficients in two different ways: (1) using the bootstrap, and (2) using the standard formula for computing the standard errors in the glm() function. Do not forget to set a random seed before beginning your analysis.
set.seed(35)
data("Default")
lmod <- glm(default ~ balance + income, Default, family = binomial)
summary(lmod)##
## Call:
## glm(formula = default ~ balance + income, family = binomial,
## data = Default)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.154e+01 4.348e-01 -26.545 < 2e-16 ***
## balance 5.647e-03 2.274e-04 24.836 < 2e-16 ***
## income 2.081e-05 4.985e-06 4.174 2.99e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2920.6 on 9999 degrees of freedom
## Residual deviance: 1579.0 on 9997 degrees of freedom
## AIC: 1585
##
## Number of Fisher Scoring iterations: 8
boot.fn <- function(data, index = 1:nrow(data)) {
coef(glm(default ~ income + balance, data = data, subset = index, family = "binomial"))[-1]
}
boot.fn(Default)## income balance
## 2.080898e-05 5.647103e-03
Multile Logistic Regression Method - Balance: 2.274e-04 Income: 4.985e-06
Bootstrap Method - Balance: 4.940896e-06 Income: 2.390732e-04
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = Default, statistic = boot.fn, R = 500)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 2.080898e-05 3.682295e-07 4.940896e-06
## t2* 5.647103e-03 1.702938e-05 2.390732e-04
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.00 17.02 21.20 22.53 25.00 50.00
## [1] 0.4088611
stat.fun = function(df,index){
mean.vec = apply(df,2,function(x)mean(x[index],na.rm = T))
return(c(mean.vec))}
set.seed(15)
boot.t = boot(Boston,stat.fun,R=2000)
print(boot.t)##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = Boston, statistic = stat.fun, R = 2000)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 3.61352356 3.540377e-04 0.373513692
## t2* 11.36363636 3.088093e-02 1.041427205
## t3* 11.13677866 -4.305464e-03 0.307916069
## t4* 0.06916996 -4.693676e-04 0.011341629
## t5* 0.55469506 4.302223e-05 0.005092919
## t6* 6.28463439 -1.015988e-04 0.030178170
## t7* 68.57490119 6.640316e-04 1.247783734
## t8* 3.79504269 -1.678561e-04 0.092063532
## t9* 9.54940711 -3.695652e-04 0.375297255
## t10* 408.23715415 -1.615958e-01 7.332759325
## t11* 18.45553360 -2.371640e-03 0.095424334
## t12* 356.67403162 -1.122536e-01 4.077679584
## t13* 12.65306324 2.627194e-03 0.317235348
## t14* 22.53280632 1.652470e-03 0.404867237
plot <- table(Boston$medv)
barplot(plot, main = "Count of Unique Values", xlab = "Unique Median Values of Owner Occupied homes ($1000s)", ylab = "Count")bootstrap 95% CI (21.723, 23.342)
t.test 95% CI (21.72953, 23.33608)
## [1] 21.72307
## [1] 23.34254
##
## One Sample t-test
##
## data: Boston$medv
## t = 55.111, df = 505, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 21.72953 23.33608
## sample estimates:
## mean of x
## 22.53281
Answer: ˆμmed = 21.20
stat.fun = function(df,index){
median.vec = apply(df,2,function(x)median(x[index],na.rm = T))
var.vec = apply(df,2,function(x)sd(x[index],na.rm = T)^2/ length(x[index][!is.na(x[index])]))
T = (median.vec[1] - median.vec[2])/sqrt(sum(var.vec))
return(c(median.vec,T))}
set.seed(15)
boot.t = boot(Boston,stat.fun,R=2000)
print(boot.t)##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = Boston, statistic = stat.fun, R = 2000)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 0.25651000 0.0064894025 0.037635143
## t2* 0.00000000 0.0000000000 0.000000000
## t3* 9.69000000 -0.4044700000 0.692132207
## t4* 0.00000000 0.0000000000 0.000000000
## t5* 0.53800000 -0.0039457500 0.006643945
## t6* 6.20850000 -0.0046225000 0.029265692
## t7* 77.50000000 0.0071500000 2.485400870
## t8* 3.20745000 -0.0104814500 0.158145555
## t9* 5.00000000 0.0000000000 0.000000000
## t10* 330.00000000 3.2750000000 13.901546731
## t11* 19.05000000 -0.0836750000 0.177603536
## t12* 391.44000000 0.0827200000 0.610163709
## t13* 11.36000000 -0.0440525000 0.521005915
## t14* 21.20000000 -0.0056250000 0.376185283
## t15* 0.02943828 0.0007468879 0.004048306
## 10% 50% 100%
## 12.75 21.20 50.00
stat.fun = function(df,index){
quantile.vec = apply(df,2,function(x)quantile(x[index],probs = c(0.1), na.rm = T))
var.vec = apply(df,2,function(x)sd(x[index],na.rm = T)^2/ length(x[index][!is.na(x[index])]))
T = (quantile.vec[1]-quantile.vec[2])/sqrt(sum(var.vec))
return(c(quantile.vec,T))}
set.seed(15)
boot.t = boot(Boston,stat.fun,R=1000)
print(boot.t)##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = Boston, statistic = stat.fun, R = 1000)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 3.819500e-02 7.014050e-04 3.136970e-03
## t2* 0.000000e+00 0.000000e+00 0.000000e+00
## t3* 2.910000e+00 1.397000e-02 2.691447e-01
## t4* 0.000000e+00 0.000000e+00 0.000000e+00
## t5* 4.270000e-01 -2.596800e-03 5.474128e-03
## t6* 5.593500e+00 -1.237500e-02 4.714114e-02
## t7* 2.695000e+01 -9.581000e-01 2.865858e+00
## t8* 1.628300e+00 1.306775e-02 5.173702e-02
## t9* 3.000000e+00 -1.440000e-01 3.396754e-01
## t10* 2.330000e+02 2.016000e+00 6.549553e+00
## t11* 1.475000e+01 7.320000e-02 1.667320e-01
## t12* 2.902700e+02 -1.050234e+01 3.236495e+01
## t13* 4.680000e+00 1.274500e-02 1.851796e-01
## t14* 1.275000e+01 1.785000e-02 5.111557e-01
## t15* 4.383436e-03 8.312096e-05 3.619368e-04