In-Class Exercise 1:
The original function
ci_norm <- function(n=100, nci=50, nRun=3, level=0.95, pause=0.05) {
ci <- NULL
z0 = qnorm(1 - (1 - level)/2)
for ( j in 1:nRun ) {
zbar <- colMeans(replicate(nci, rnorm(n)))
zl <- zbar - z0 * 1/sqrt(n); zu <- zbar + z0 * 1/sqrt(n)
plot(1, xlim = c(0.5, nci + 0.5), ylim = c(min(zl), max(zu)),
type = "n", xlab = "Sample ID", ylab = "Average")
abline(h = 0, lty = 2)
for( i in 1:nci ) {
arrows(i, zl[i], i, zu[i], length = 0.05, angle = 90,
code = 3,
col = ifelse(0 > zl[i] & 0 < zu[i], "gray", "red"))
points(i, zbar[i], pch = 19, col = "gray")
Sys.sleep(pause)
}
Sys.sleep(3)
}
}The revised function (referred from Jay-Liao )
The function for each ci, if ci including 0 represent as gray, if not represent as red
ci_norm_ci <- function(i, zbar, zl, zu, pause) {
arrows(i, zl[i], i, zu[i], length = 0.05, angle = 90, code = 3,
col = ifelse(0 > zl[i] & 0 < zu[i], "gray", "red"))
points(i, zbar[i], pch = 19, col = "gray")
Sys.sleep(pause)
}ci_norm_r <- function(n=100, nci=50, nRun=3, level=0.95, pause=0.05, seeds=sample(1:100, 1), file_name='test') {
set.seed(seeds)
ci <- NULL
z0 <- qnorm(1 - (1 - level)/2)
lapply(1:nRun, function(r) {
zbar <- colMeans(replicate(nci, rnorm(n)))
zl <- zbar - z0 * 1/sqrt(n)
zu <- zbar + z0 * 1/sqrt(n)
plot(1, xlim = c(0.5, nci + 0.5), ylim = c(min(zl), max(zu) + .1), type = "n",
xlab = "Sample ID", ylab = "Average")
text(5, max(zu) + .05, paste0('Running iteration = ', r))
abline(h = 0, lty = 2)
lapply(1:nci, function(i) ci_norm_ci(i=i, zbar=zbar, zl=zl, zu=zu, pause=pause))
})
}In-Class Exercise 2: The correlation method
Load data file
## id female race ses schtyp prog read write math science socst
## 1 70 male white low public general 57 52 41 47 57
## 2 121 female white middle public vocation 68 59 53 63 61
## 3 86 male white high public general 44 33 54 58 31
## 4 141 male white high public vocation 63 44 47 53 56
## 5 172 male white middle public academic 47 52 57 53 61
## 6 113 male white middle public academic 44 52 51 63 61
The original function
cnt <- 0
nIter <- 1001
for (i in 1:nIter) {
new <- sample(dta.asian$read)
r <- cor(new, dta.asian$math)
if ( r0 <= r ) cnt <- cnt+1
}
cnt/nIter## [1] 0.04195804
newread <- replicate(nIter, sample(dta.asian$read))
newread <- data.frame(unlist(newread))
with(newread, lapply(names(newread), function(x)
cor(dta.asian$math, eval(substitute(tmp, list(tmp=as.name(x)))))))##
## Pearson's product-moment correlation
##
## data: dta[dta$race == "asian", "math"] and dta[dta$race == "asian", "socst"]
## t = 1.9887, df = 9, p-value = 0.07796
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.07083501 0.86552255
## sample estimates:
## cor
## 0.5525177
The revised version, use sapply function and self-defined function to excecute the generation.
cor_test <- function(var1, var2, nIter=1000) {
cnt <- 0
r0 <- cor(var1,var2)
sapply(1:nIter, function(i) {
new <- sample(var2)
r <- cor(new, var1)
return(r0 <= r)
})
}
mean(cor_test(var1=dta.asian$math, dta.asian$read))## [1] 0.036
In-Class Exercise 3: The Bootstrap Estimates of Coefficients for a Multiple Linear Regression Model
Load data file
## Treat Prewt Postwt
## CBT :29 Min. :70.00 Min. : 71.30
## Cont:26 1st Qu.:79.60 1st Qu.: 79.33
## FT :17 Median :82.30 Median : 84.05
## Mean :82.41 Mean : 85.17
## 3rd Qu.:86.00 3rd Qu.: 91.55
## Max. :94.90 Max. :103.60
## [1] 72 3
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14" "15"
## [16] "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "30"
## [31] "31" "32" "33" "34" "35" "36" "37" "38" "39" "40" "41" "42" "43" "44" "45"
## [46] "46" "47" "48" "49" "50" "51" "52" "53" "54" "55" "56" "57" "58" "59" "60"
## [61] "61" "62" "63" "64" "65" "66" "67" "68" "69" "70" "71" "72"
## Conduct linear regression
anrx_lm <- lm(Postwt ~ Prewt + Treat, data=dta)
## Show the results of linear model
summary(anrx_lm)##
## Call:
## lm(formula = Postwt ~ Prewt + Treat, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.1083 -4.2773 -0.5484 5.4838 15.2922
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 49.7711 13.3910 3.717 0.00041 ***
## Prewt 0.4345 0.1612 2.695 0.00885 **
## TreatCont -4.0971 1.8935 -2.164 0.03400 *
## TreatFT 4.5631 2.1333 2.139 0.03604 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.978 on 68 degrees of freedom
## Multiple R-squared: 0.2777, Adjusted R-squared: 0.2458
## F-statistic: 8.713 on 3 and 68 DF, p-value: 5.719e-05
## (Intercept) Prewt TreatCont TreatFT
## 49.7711090 0.4344612 -4.0970655 4.5630627
## Create a function to generate the bootstrap analysis
bootstrap_function <- function(string_formula, nc, model_data, ndraws) {
coeff_mtx <- matrix(0, nrow = ndraws, ncol = nc)
for (i in 1:ndraws) {
bootstrap_ids <- sample(seq(nrow(model_data)), nrow(model_data), replace = TRUE)
bootstrap_data <- model_data[bootstrap_ids,]
bootstrap_model <- lm(as.formula(string_formula), bootstrap_data)
coeff_mtx[i,] <- bootstrap_model$coefficients
if (i == 1) {
print(bootstrap_model$coefficients)
}
}
return(coeff_mtx)
}
## Run the function
bootstrap_estimates <- bootstrap_function("Postwt ~ Prewt + Treat", nc=4, dta, 500)## (Intercept) Prewt TreatCont TreatFT
## 66.5615835 0.2434191 -5.6856067 7.6875980
## For bootstrap CIs
bootstrap_CIs <- matrix(0, nrow = 4, ncol = 2)
for (i in 1:4) {
bootstrap_CIs[i,1] <- quantile(bootstrap_estimates[,i], 0.025)
bootstrap_CIs[i,2] <- quantile(bootstrap_estimates[,i], 0.975)
}
## Show the CIs
print(bootstrap_CIs)## [,1] [,2]
## [1,] 15.07047391 77.8825696
## [2,] 0.08962021 0.8445048
## [3,] -7.63200616 -0.4699275
## [4,] -0.20201536 8.8607783
## 2.5 % 97.5 %
## (Intercept) 23.0498681 76.4923499
## Prewt 0.1128268 0.7560955
## TreatCont -7.8754712 -0.3186599
## TreatFT 0.3060571 8.8200682
## Display the histogram of the bootstraping results
par(mfrow=c(2,2))
hist(bootstrap_estimates[,1])
hist(bootstrap_estimates[,2])
hist(bootstrap_estimates[,3])
hist(bootstrap_estimates[,4])The revised version, for bootstrapping formula
The revised version, for bootstrapping CIs
The revised version, for generating the bootstrapping results
bootstrap_function_r <- function(string_formula, model_data, ndraws=1000, CI=FALSE) {
coeff_mtx <- t(sapply(1:ndraws, function(i) bootstrap_function_formula(string_formula, model_data)))
if (CI) {return(bootstrap_CIs_r(coeff_mtx=coeff_mtx, level=level))}
else {return(coeff_mtx)}
}
bootstrap_estimates <- bootstrap_function_r(string_formula="Postwt ~ Prewt + Treat",
model_data=dta, ndraws=500)
head(bootstrap_estimates)## (Intercept) Prewt TreatCont TreatFT
## [1,] 35.28652 0.6182613 -3.933824 2.960395
## [2,] 49.95680 0.4323503 -5.029876 2.635078
## [3,] 41.74531 0.5301579 -3.964060 1.727063
## [4,] 15.37227 0.8354118 -1.465516 3.330165
## [5,] 32.03421 0.6777810 -6.836423 3.326761
## [6,] 67.27215 0.1990483 -1.720711 9.259112
Display the histogram of the bootstraping results
par(mfrow=c(2,2))
hist(bootstrap_estimates[,1])
hist(bootstrap_estimates[,2])
hist(bootstrap_estimates[,3])
hist(bootstrap_estimates[,4])