511excercise1-3 and homewor3

exercise1

ci_norm <- function(n=100, nci=50, nRun=3, level=0.95, pause=0.05) {
  ci <- NULL
  z0 = qnorm(1 - (1 - level)/2) #在常態分佈中，0.975的Z-score
  for ( j in 1:nRun ) { #j依序帶入1~3(nRun)
      zbar <- colMeans(replicate(nci, rnorm(n))) 
      #zbar=產生50個常態分布的隨機數(n=100,m=0,sd=1)   
      
      zl <- zbar - z0 * 1/sqrt(n); zu <- zbar + z0 * 1/sqrt(n)
      #zl=zbar - z0*0.1 ; #zu=zbar + z0*0.1
      
      #開始繪圖
      plot(1, xlim = c(0.5, nci + 0.5), ylim = c(min(zl), max(zu)), 
           type = "n", xlab = "Sample ID", ylab = "Average") 
      abline(h = 0, lty = 3) #畫出水平軸=0
      
      for( i in 1:nci ) { #依序帶入1~50(nci)
          arrows(i, zl[i], i, zu[i], length = 0.05, angle = 90, 
               code = 3,
               #1~50按照順序一一畫出，上下界分別為zl[i]、zu[i]
               
               col = ifelse(0 > zl[i] & 0 < zu[i], "gray", "red"))
        #如果0 > zl[i] 或 0 < zu[i]，則使用灰色，否則使用紅色
        
          points(i, zbar[i], pch = 19, col = "gray")
          #原點一律灰色
          
      }
      
   }
}

## 最後更改代號(n、nci、nRun)代表的數字
ci_norm(n = 36, nci = 51, nRun = 2)

## 更改為while-loop、repeat-loop
ci_norm <- function(n=100, nci=50, nRun=3, level=0.95, pause=0.05) {
  ci <- NULL
  z0 = qnorm(1 - (1 - level)/2) #在常態分佈中，0.975的Z-score
  j <- 1
  while ( j <= nRun ) { #當j小於、等於(nRun)的時候，一一帶入
    j <- j+1
      zbar <- colMeans(replicate(nci, rnorm(n))) 
      #zbar=產生50個常態分布的隨機數(n=100,m=0,sd=1)   
      
      zl <- zbar - z0 * 1/sqrt(n); zu <- zbar + z0 * 1/sqrt(n)
      #zl=zbar - z0*0.1 ; #zu=zbar + z0*0.1
      
      #開始繪圖
      plot(1, xlim = c(0.5, nci + 0.5), ylim = c(min(zl), max(zu)), 
           type = "n", xlab = "Sample ID", ylab = "Average") 
      abline(h = 0, lty = 3) #畫出水平軸=0
      i <- 1
      repeat{ 
        if(i > nci) break  #一一帶入，當i大於50(nci)的時候，停止
        i <- i+1
        arrows(i, zl[i], i, zu[i], length = 0.05, angle = 90, 
               code = 3,
               #1~50按照順序一一畫出，上下界分別為zl[i]、zu[i]
               
               col = ifelse(0 > zl[i] & 0 < zu[i], "gray", "red"))
        #如果0 > zl[i] 或 0 < zu[i]，則使用灰色，否則使用紅色
        
          points(i, zbar[i], pch = 19, col = "black")
          #原點一律黑色
          
      }
      
   }
}

## 最後更改原本代號(n、nci、nRun)設定的數字
ci_norm(n = 36, nci = 51, nRun = 2)

exercise 2

dta <-  read.table("/Users/fightk/Desktop/hs0.txt", sep='\t', header = T)
summary(dta)

##        id            female              race         ses         schtyp   
##  Min.   :  1.00   female:109   african-amer: 20   high  :58   private: 32  
##  1st Qu.: 50.75   male  : 91   asian       : 11   low   :47   public :168  
##  Median :100.50                hispanic    : 24   middle:95                
##  Mean   :100.50                white       :145                            
##  3rd Qu.:150.25                                                            
##  Max.   :200.00                                                            
##                                                                            
##        prog          read           write            math          science     
##  academic:105   Min.   :28.00   Min.   :31.00   Min.   :33.00   Min.   :29.00  
##  general : 45   1st Qu.:44.00   1st Qu.:45.75   1st Qu.:45.00   1st Qu.:44.00  
##  vocation: 50   Median :50.00   Median :54.00   Median :52.00   Median :53.00  
##                 Mean   :52.23   Mean   :52.77   Mean   :52.65   Mean   :51.92  
##                 3rd Qu.:60.00   3rd Qu.:60.00   3rd Qu.:59.00   3rd Qu.:58.00  
##                 Max.   :76.00   Max.   :67.00   Max.   :75.00   Max.   :74.00  
##                                                                 NA's   :5      
##      socst      
##  Min.   :26.00  
##  1st Qu.:46.00  
##  Median :52.00  
##  Mean   :52.41  
##  3rd Qu.:61.00  
##  Max.   :71.00  
##

dta.asian <- subset(dta, race=="asian")
r0 <- cor(dta.asian$math, dta.asian$socst)
cnt <- 0
nIter <- 1001
for (i in 1:nIter) {
    new <- sample(dta.asian$read)
    r <- cor(new, dta.asian$math)
    if ( r0 <= r ) cnt <- cnt+1
}
cnt/nIter

## [1] 0.04295704

##其他方法使用repeat跟if
dta <- read.table("/Users/fightk/Desktop/hs0.txt", header = T)
dta.asian <- subset(dta, race=="asian")
r0 <- cor(dta.asian$math, dta.asian$socst)
cnt <- 0
nIter <- 1001
i <- 1

repeat{
  new <- sample(dta.asian$read)
  r <- cor(new, dta.asian$math)
  i=i+1
  if ( r0 <= r ) cnt <- cnt+1
  if (i>=nIter) break}
cnt/nIter

## [1] 0.02997003

##其他方法使用while
dta.asian <- subset(dta, race=="asian")


r0 <- cor(dta.asian$math, dta.asian$socst)

cnt <- 0
nIter <- 1001
i <- 0
while (i <= nIter) 
  {
  i <- i+1
    new <- sample(dta.asian$read) 
    r <- cor(new, dta.asian$math) 
    if ( r0 <= r ) cnt <- cnt+1 
}

cnt/nIter

## [1] 0.03796204

exercise 3

##
data(anorexia, package="MASS")

##
dta <- anorexia

##     
summary(dta)

##   Treat        Prewt           Postwt      
##  CBT :29   Min.   :70.00   Min.   : 71.30  
##  Cont:26   1st Qu.:79.60   1st Qu.: 79.33  
##  FT  :17   Median :82.30   Median : 84.05  
##            Mean   :82.41   Mean   : 85.17  
##            3rd Qu.:86.00   3rd Qu.: 91.55  
##            Max.   :94.90   Max.   :103.60

##
dim(dta)

## [1] 72  3

##    
row.names(dta)

##  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14" "15"
## [16] "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "30"
## [31] "31" "32" "33" "34" "35" "36" "37" "38" "39" "40" "41" "42" "43" "44" "45"
## [46] "46" "47" "48" "49" "50" "51" "52" "53" "54" "55" "56" "57" "58" "59" "60"
## [61] "61" "62" "63" "64" "65" "66" "67" "68" "69" "70" "71" "72"

##    
anrx_lm <- lm(Postwt ~ Prewt + Treat, data=dta)

##    
summary(anrx_lm)

## 
## Call:
## lm(formula = Postwt ~ Prewt + Treat, data = dta)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.1083  -4.2773  -0.5484   5.4838  15.2922 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  49.7711    13.3910   3.717  0.00041 ***
## Prewt         0.4345     0.1612   2.695  0.00885 ** 
## TreatCont    -4.0971     1.8935  -2.164  0.03400 *  
## TreatFT       4.5631     2.1333   2.139  0.03604 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.978 on 68 degrees of freedom
## Multiple R-squared:  0.2777, Adjusted R-squared:  0.2458 
## F-statistic: 8.713 on 3 and 68 DF,  p-value: 5.719e-05

##    
coef(anrx_lm)

## (Intercept)       Prewt   TreatCont     TreatFT 
##  49.7711090   0.4344612  -4.0970655   4.5630627

## 更改成while-loop   
bootstrap_function  <- function(string_formula, nc, model_data, ndraws) {
      coeff_mtx <- matrix(0, nrow = ndraws,  ncol = nc) #coeff_mtx為矩陣
      i <- 1
      while (i < ndraws) { #抽取<ndraws次
        i <- i+1
        bootstrap_ids   <- sample(seq(nrow(model_data)), nrow(model_data), replace = TRUE)
        #從model_data中抽取，抽取後放入
        bootstrap_data <- model_data[bootstrap_ids,]
        bootstrap_model <- lm(as.formula(string_formula), bootstrap_data)
        coeff_mtx[i,]   <- bootstrap_model$coefficients
        if (i == 1) {
          print(bootstrap_model$coefficients)
        }
      }     
      return(coeff_mtx)
    }
    
##按照設定的公式輸入資料與數字
bootstrap_estimates <- bootstrap_function("Postwt ~ Prewt + Treat", nc=4, dta, 500) 

## 列出前後2.5%
bootstrap_CIs <- matrix(0, nrow = 4, ncol = 2)
    for (i in 1:4) {
      bootstrap_CIs[i,1]  <- quantile(bootstrap_estimates[,i], 0.025)
      bootstrap_CIs[i,2]  <- quantile(bootstrap_estimates[,i], 0.975)
    }
## 產出bootstrap_CIs
print(bootstrap_CIs)

##            [,1]       [,2]
## [1,] 16.4300827 75.5390022
## [2,]  0.1158243  0.8125641
## [3,] -7.7562615 -0.4632148
## [4,] -0.1893795  8.9585615

##
confint(anrx_lm)

##                  2.5 %     97.5 %
## (Intercept) 23.0498681 76.4923499
## Prewt        0.1128268  0.7560955
## TreatCont   -7.8754712 -0.3186599
## TreatFT      0.3060571  8.8200682

## 更改使用for-loop
pict <- function(j){
  pic <- par(mfrow=c(2,2))
  for (j in 1:4)
  hist(bootstrap_estimates[,j])
  return(pic)
}
pict(1)

## $mfrow
## [1] 1 1

##

exercise4

Brownian <- function(n = 11, pause = 0.05, nIter = 512, ...) {
    x = rnorm(n)
    y = rnorm(n)
    i = 1
    while (i <= nIter) {
        plot(x, y, ...)
        text(x, y, cex = 0.5)
        x = x + rnorm(n)
        y = y + rnorm(n)
        Sys.sleep(pause)
        i = i + 1
    }
} 


## for-loop
Brownian <- function(n = 11, pause = 0.05, ...) {
    x = rnorm(n) 
    y = rnorm(n) 
    i = 1
    for (i in c(1:4)) { 
        plot(x, y, ...) 
        text(x, y, cex = 0.5) 
        x = x + rnorm(n) 
        y = y + rnorm(n) 
        Sys.sleep(pause) 
        i = i + 1 
    }
} 

###

## test it (開始繪圖)
Brownian(xlim = c(-20, 20), ylim = c(-20, 20), 
    pch = 21, cex = 2, col = "cyan", bg = "lavender")

exercise 5

#
#
simSexHeight <- function(n)  {
##轉成Matrix (矩陣)，runif()形成均勻分佈的隨機數，rnorm產生隨機函數，設定列數 nrow=2
  m <- matrix(c(runif(n), rnorm(n)), ncol=2)
##if('條件'){'做A'}、else{'做B'}
  draw <- function(mr) {  # mr = one row of m
    if (mr[1] < 0.505) {
      sex <- 1
      cm <- 170 + 7*mr[2]
    } else {
      sex <- 0
      cm <- 160 + 5*mr[2] 
    }
##函式(function)可以輸入參數、執行陳述式，最後回傳輸出(return)
    return(c(sex, cm))
  }
##向量或矩陣的 “轉置” (transpose) 使用 t()，對一陣列 X 的邊際 () 執行同一函式做運算, 並回傳一向量, 陣列或列表. 其中引數 x 為所要執行之陣列物件。as.data.frame再轉成data.frame的形式
  person <- t(apply(m, 1, draw))
  person <- as.data.frame(person)
##ifelse('條件', '條件若成立：做A', '條件若不成立：做B')
  person[, 1] <- ifelse(person[, 1]==1, "M", "F")
  names(person) <- c("Gender", "Height")
  return(person)
}

##指定呈現多少欄列可以叫出設定的數據
simSexHeight(3)

###

homework3

#
# plot a color strip
#
plot.new()

plot.window(xlim = c(0, 6), ylim = c(0, 6), asp = 1)

my_cl <- c("indianred", "orange", "yellow", "forestgreen", "dodgerblue",
           "violet", "purple")
           
for(i in 1:6) rect(i-1, i-1, i, i, col = my_cl[i])

###
c <- matrix(,nrow=6,ncol=6)
for(j in 1:6){
  for(i in 1:6){
    if(j==1){
      c[i,j]=i
    }
    else{
      c[i,j]=(c[i,j-1]-1)
      if(c[i,j]<1) c[i,j]=c[i,j]+7
    }
  }
}
plot.new()
plot.window(xlim = c(0, 6), ylim = c(0, 6), asp = 1)
my_cl <- c("indianred", "orange", "yellow", "forestgreen", "dodgerblue", "violet", "purple")
for(i in 1:6) {
  for(j in 1:6) {
    cc<-c[i,j]
    rect(i-1,j-1 ,i, j, col = my_cl[cc])
  }
}