In-Class Exercise 1:

The original function

ci_norm <- function(n=100, nci=50, nRun=3, level=0.95, pause=0.05) {
  ci <- NULL
  z0 = qnorm(1 - (1 - level)/2)
  for ( j in 1:nRun ) {
    zbar <- colMeans(replicate(nci, rnorm(n)))     
    zl <- zbar - z0 * 1/sqrt(n); zu <- zbar + z0 * 1/sqrt(n)
    plot(1, xlim = c(0.5, nci + 0.5), ylim = c(min(zl), max(zu)), 
         type = "n", xlab = "Sample ID", ylab = "Average") 
    abline(h = 0, lty = 2)
    for( i in 1:nci ) {
      arrows(i, zl[i], i, zu[i], length = 0.05, angle = 90, 
             code = 3,
             col = ifelse(0 > zl[i] & 0 < zu[i], "gray", "red"))
      points(i, zbar[i], pch = 19, col = "gray")
      Sys.sleep(pause)
    }
    Sys.sleep(3)
  }
}

Run

ci_norm(n = 36, nci = 51, nRun = 2)

The revised function (referred from Jay-Liao )

The function for each ci, if ci including 0 represent as gray, if not represent as red

ci_norm_ci <- function(i, zbar, zl, zu, pause) {
  arrows(i, zl[i], i, zu[i], length = 0.05, angle = 90, code = 3,
         col = ifelse(0 > zl[i] & 0 < zu[i], "gray", "red"))
  points(i, zbar[i], pch = 19, col = "gray")
  Sys.sleep(pause)
}

ci_norm_r <- function(n=100, nci=50, nRun=3, level=0.95, pause=0.05, seeds=sample(1:100, 1), file_name='test') {
  set.seed(seeds)
  ci <- NULL
  z0 <- qnorm(1 - (1 - level)/2)
  lapply(1:nRun, function(r) {
    zbar <- colMeans(replicate(nci, rnorm(n)))     
    zl <- zbar - z0 * 1/sqrt(n)
    zu <- zbar + z0 * 1/sqrt(n)
    plot(1, xlim = c(0.5, nci + 0.5), ylim = c(min(zl), max(zu) + .1), type = "n",
         xlab = "Sample ID", ylab = "Average")
    text(5, max(zu) + .05, paste0('Running iteration = ', r))
    abline(h = 0, lty = 2)
    lapply(1:nci, function(i) ci_norm_ci(i=i, zbar=zbar, zl=zl, zu=zu, pause=pause))
  })
}

ci_norm_r(n = 50, nci = 41, nRun = 2)

In-Class Exercise 2: The correlation method

Load data file

dta <- read.table("hs0.txt", header = T)
head(dta)

##    id female  race    ses schtyp     prog read write math science socst
## 1  70   male white    low public  general   57    52   41      47    57
## 2 121 female white middle public vocation   68    59   53      63    61
## 3  86   male white   high public  general   44    33   54      58    31
## 4 141   male white   high public vocation   63    44   47      53    56
## 5 172   male white middle public academic   47    52   57      53    61
## 6 113   male white middle public academic   44    52   51      63    61

The original function

dta.asian <- subset(dta, race=="asian")
r0 <- cor(dta.asian$math, dta.asian$socst)

cnt <- 0
nIter <- 1001
for (i in 1:nIter) {
  new <- sample(dta.asian$read)
  r <- cor(new, dta.asian$math)
  if ( r0 <= r ) cnt <- cnt+1
}
cnt/nIter

## [1] 0.04195804

newread <- replicate(nIter, sample(dta.asian$read))
newread <- data.frame(unlist(newread))
with(newread, lapply(names(newread), function(x) 
cor(dta.asian$math, eval(substitute(tmp, list(tmp=as.name(x)))))))

cor.test(dta[dta$race=="asian", "math"], dta[dta$race=="asian", "socst"])

## 
##  Pearson's product-moment correlation
## 
## data:  dta[dta$race == "asian", "math"] and dta[dta$race == "asian", "socst"]
## t = 1.9887, df = 9, p-value = 0.07796
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.07083501  0.86552255
## sample estimates:
##       cor 
## 0.5525177

The revised version, use sapply function and self-defined function to excecute the generation.

cor_test <- function(var1, var2, nIter=1000) {
  cnt <- 0
  r0 <- cor(var1,var2)
  sapply(1:nIter, function(i) {
    new <- sample(var2)
    r <- cor(new, var1)
    return(r0 <= r)
  })
}

mean(cor_test(var1=dta.asian$math, dta.asian$read))

## [1] 0.036

In-Class Exercise 3: The Bootstrap Estimates of Coefficients for a Multiple Linear Regression Model

Load data file

data(anorexia, package="MASS")
dta <- anorexia

## Show the summary table of the data   
summary(dta)

##   Treat        Prewt           Postwt      
##  CBT :29   Min.   :70.00   Min.   : 71.30  
##  Cont:26   1st Qu.:79.60   1st Qu.: 79.33  
##  FT  :17   Median :82.30   Median : 84.05  
##            Mean   :82.41   Mean   : 85.17  
##            3rd Qu.:86.00   3rd Qu.: 91.55  
##            Max.   :94.90   Max.   :103.60

## Show the numbers of observations and variables
dim(dta)

## [1] 72  3

## Show the row names
row.names(dta)

##  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14" "15"
## [16] "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "30"
## [31] "31" "32" "33" "34" "35" "36" "37" "38" "39" "40" "41" "42" "43" "44" "45"
## [46] "46" "47" "48" "49" "50" "51" "52" "53" "54" "55" "56" "57" "58" "59" "60"
## [61] "61" "62" "63" "64" "65" "66" "67" "68" "69" "70" "71" "72"

## Conduct linear regression
anrx_lm <- lm(Postwt ~ Prewt + Treat, data=dta)

## Show the results of linear model
summary(anrx_lm)

## 
## Call:
## lm(formula = Postwt ~ Prewt + Treat, data = dta)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.1083  -4.2773  -0.5484   5.4838  15.2922 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  49.7711    13.3910   3.717  0.00041 ***
## Prewt         0.4345     0.1612   2.695  0.00885 ** 
## TreatCont    -4.0971     1.8935  -2.164  0.03400 *  
## TreatFT       4.5631     2.1333   2.139  0.03604 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.978 on 68 degrees of freedom
## Multiple R-squared:  0.2777, Adjusted R-squared:  0.2458 
## F-statistic: 8.713 on 3 and 68 DF,  p-value: 5.719e-05

## Show the coefficient of the factors
coef(anrx_lm)

## (Intercept)       Prewt   TreatCont     TreatFT 
##  49.7711090   0.4344612  -4.0970655   4.5630627

## Create a function to generate the bootstrap analysis
bootstrap_function  <- function(string_formula, nc, model_data, ndraws) {
  coeff_mtx <- matrix(0, nrow = ndraws,  ncol = nc)    
  for (i in 1:ndraws) {
    bootstrap_ids   <- sample(seq(nrow(model_data)), nrow(model_data), replace = TRUE)
    bootstrap_data <- model_data[bootstrap_ids,]
    bootstrap_model <- lm(as.formula(string_formula), bootstrap_data)
    coeff_mtx[i,]   <- bootstrap_model$coefficients
    if (i == 1) {
      print(bootstrap_model$coefficients)
    }
  }     
  return(coeff_mtx)
}

## Run the function
bootstrap_estimates <- bootstrap_function("Postwt ~ Prewt + Treat", nc=4, dta, 500)

## (Intercept)       Prewt   TreatCont     TreatFT 
##  66.5615835   0.2434191  -5.6856067   7.6875980

## For bootstrap CIs
bootstrap_CIs <- matrix(0, nrow = 4, ncol = 2)
for (i in 1:4) {
  bootstrap_CIs[i,1]  <- quantile(bootstrap_estimates[,i], 0.025)
  bootstrap_CIs[i,2]  <- quantile(bootstrap_estimates[,i], 0.975)
}

## Show the CIs
print(bootstrap_CIs)

##             [,1]       [,2]
## [1,] 15.07047391 77.8825696
## [2,]  0.08962021  0.8445048
## [3,] -7.63200616 -0.4699275
## [4,] -0.20201536  8.8607783

## Show the CIs
confint(anrx_lm)

##                  2.5 %     97.5 %
## (Intercept) 23.0498681 76.4923499
## Prewt        0.1128268  0.7560955
## TreatCont   -7.8754712 -0.3186599
## TreatFT      0.3060571  8.8200682

## Display the histogram of the bootstraping results
par(mfrow=c(2,2))
hist(bootstrap_estimates[,1])
hist(bootstrap_estimates[,2])
hist(bootstrap_estimates[,3])
hist(bootstrap_estimates[,4])

The revised version, for bootstrapping formula

bootstrap_function_formula  <- function(string_formula, model_data) {
    bootstrap_ids   <- sample(seq(nrow(model_data)), nrow(model_data), replace = TRUE)
    bootstrap_data <- model_data[bootstrap_ids,]
    bootstrap_model <- lm(as.formula(string_formula), bootstrap_data)
    return(bootstrap_model$coefficients)
}

The revised version, for bootstrapping CIs

bootstrap_CIs_r <- function(x) {
  CIs <- data.frame(apply(x, 2, function(x) quantile(x, 0.025)),
                    apply(x, 2, function(x) quantile(x, 0.975)))
  return(CIs)
}

The revised version, for generating the bootstrapping results

bootstrap_function_r  <- function(string_formula, model_data, ndraws=1000, CI=FALSE) {
  coeff_mtx <- t(sapply(1:ndraws, function(i) bootstrap_function_formula(string_formula, model_data)))
  if (CI) {return(bootstrap_CIs_r(coeff_mtx=coeff_mtx, level=level))}
  else {return(coeff_mtx)}
  }


bootstrap_estimates <- bootstrap_function_r(string_formula="Postwt ~ Prewt + Treat",
                                            model_data=dta, ndraws=500)
head(bootstrap_estimates)

##      (Intercept)     Prewt TreatCont  TreatFT
## [1,]    35.28652 0.6182613 -3.933824 2.960395
## [2,]    49.95680 0.4323503 -5.029876 2.635078
## [3,]    41.74531 0.5301579 -3.964060 1.727063
## [4,]    15.37227 0.8354118 -1.465516 3.330165
## [5,]    32.03421 0.6777810 -6.836423 3.326761
## [6,]    67.27215 0.1990483 -1.720711 9.259112

Display the histogram of the bootstraping results

par(mfrow=c(2,2))
hist(bootstrap_estimates[,1])
hist(bootstrap_estimates[,2])
hist(bootstrap_estimates[,3])
hist(bootstrap_estimates[,4])

Week 9 In-class exercise (Programming)

Hao-Lun Fu

2020-05-18

In-Class Exercise 1:

The original function

Run

The revised function (referred from Jay-Liao )

The function for each ci, if ci including 0 represent as gray, if not represent as red

In-Class Exercise 2: The correlation method

Load data file

The original function

The revised version, use sapply function and self-defined function to excecute the generation.

In-Class Exercise 3: The Bootstrap Estimates of Coefficients for a Multiple Linear Regression Model

Load data file

The revised version, for bootstrapping formula

The revised version, for bootstrapping CIs

The revised version, for generating the bootstrapping results

Display the histogram of the bootstraping results