4/9 Homework

環境設定

pacman::p_load(tidyr,broom,magrittr,dplyr,ggplot2,nlme,kableExtra,furniture,purrr)
options(digit=4)

Q1

寫一個把台幣轉換成美元的 function

    q1fxn <- function(){ NT<- readline(prompt = "Enter $NT:");
                        US = as.numeric(NT)*29.32;
                          cat(NT,"$NT","=",US,"$US",sep="")}

喚出 function

    q1fxn()

## Enter $NT:
## $NT=NA$US

輸入一個值

## [1] 200

Q2

檢視資料，將資料存成物件

    head(ChickWeight)

## Grouped Data: weight ~ Time | Chick
##   weight Time Chick Diet
## 1     42    0     1    1
## 2     51    2     1    1
## 3     59    4     1    1
## 4     64    6     1    1
## 5     76    8     1    1
## 6     93   10     1    1

    str(ChickWeight)

## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame':   578 obs. of  4 variables:
##  $ weight: num  42 51 59 64 76 93 106 125 149 171 ...
##  $ Time  : num  0 2 4 6 8 10 12 14 16 18 ...
##  $ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 15 15 15 15 15 15 15 15 15 15 ...
##  $ Diet  : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
##  - attr(*, "formula")=Class 'formula'  language weight ~ Time | Chick
##   .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
##  - attr(*, "outer")=Class 'formula'  language ~Diet
##   .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
##  - attr(*, "labels")=List of 2
##   ..$ x: chr "Time"
##   ..$ y: chr "Body weight"
##  - attr(*, "units")=List of 2
##   ..$ x: chr "(days)"
##   ..$ y: chr "(gm)"

    q2dta<-ChickWeight

把資料依據 individual chicks 分組

    q2split<-split(q2dta,q2dta$Chick)

嘗試用 lapply

    q2cor <-sapply(q2split,function(x) lm(weight~Time,data = x)$coef)
    q2cor

##             18        16       15        13         9        20        10
## (Intercept) 39 43.392857 46.83333 43.384359 52.094086 37.667826 38.695054
## Time        -2  1.053571  1.89881  2.239601  2.663137  3.732718  4.066102
##                     8        17       19        4         6        11
## (Intercept) 43.727273 43.030706 31.21222 32.86568 44.123431 47.921948
## Time         4.827273  4.531538  5.08743  6.08864  6.378006  7.510967
##                    3         1        12         2        5       14
## (Intercept) 23.17955 24.465436 21.939797 24.724853 16.89563 20.52488
## Time         8.48737  7.987899  8.440629  8.719861 10.05536 11.98245
##                     7        24        30        22        23        27
## (Intercept)  5.842535 53.067766 39.109666 40.082590 38.428074 29.858569
## Time        13.205264  1.207533  5.898351  5.877931  6.685978  7.379368
##                    28       26       25        29       21        33
## (Intercept) 23.984874 20.70715 19.65119  5.882771 15.56330 45.830283
## Time         9.703676 10.10316 11.30676 12.453487 15.47512  5.855241
##                    37       36       31       39       38       32
## (Intercept) 29.608834 25.85403 19.13099 17.03661 10.67282 13.69173
## Time         6.677053  9.99047 10.02617 10.73710 12.06051 13.18091
##                   40        34        35        44        45        43
## (Intercept) 10.83830  5.081682  4.757979 44.909091 35.673121 52.185751
## Time        13.44229 15.000151 17.258811  6.354545  7.686432  8.318863
##                    41        47        49        46       50       42
## (Intercept) 39.337922 36.489790 31.662986 27.771744 23.78218 19.86507
## Time         8.159885  8.374981  9.717894  9.738466 11.33293 11.83679
##                    48
## (Intercept)  7.947663
## Time        13.714718

Q3

建立一個 function 能輸入自由度便畫出 t分配和z分配圖

    q3fxn <- function(df=30){ 
                          x=seq(-3.3,3.3,0.05);
                          plot(x,dnorm(x),type = "l");
                          lines(x,dt(x,as.numeric(df),log=F),type = "l",col = "green")}

喚出 function，輸入一個自由度

    q3fxn(16)

Q4

Cushings example

   pacman::p_load(MASS, tidyverse)

method 1

         m1 <- aggregate( . ~ Type, data = Cushings, mean)
      class(m1)

## [1] "data.frame"

  agregate 將data frame資料根據 type 分組，計算出統計值。輸出以變項為行，以分組為列
  優點是步驟單純，直接了當，缺點是能算的 function 相對有限

method 2

         m2<-sapply(split(Cushings[,-3], Cushings$Type), function(x) apply(x, 2, mean))
      class(m2)

## [1] "matrix"

用 split 先將資料根據 type 分組，產生一個分組後的list
將分組資料用 sapply 丟入 function(x)，而此 function 是用 apply 算輸入的資料x中，每一行的平均。
於是每組的平均被計算出來，並藉由 sapply 以array型態輸出
以變項為列，以分組為行(wide format)

method 3

    m3 <- do.call("rbind", as.list(
      by(Cushings, list(Cushings$Type), function(x) {
        y <- subset(x, select =  -Type)
        apply(y, 2, mean)
      }
      )))
    class(m3)

## [1] "matrix"

  do.call 喚出 function。它的 argument 需為一個 list, 所以加入了 as.list 的指令。
  by 將 function(x)套用到根據 Type 分出的各組，而它的分組依據需是一個factor 的 list, 所以用 list 指令建立之
  function (x)中，subset篩選出需要帶入function 的資料範圍，然後用 apply 將 mean 指令套用到此範圍的每一行上。
  function (x) 定義如何算 mean，by 定義分組。算出每一組的 mean 後，將結果轉為 List 以符合 do.call 的格式要求
  do.call 用 rbind 將每組的資料疊起來，輸出一個以組為行，以變項為列的matrix.
  此法缺點是麻煩，需要把很多東西轉換成 list，也需要把最後的 list 重新組合成 matrix.

method 4

    m4 <- Cushings %>%
      group_by(Type) %>%
      summarize( t_m = mean(Tetrahydrocortisone), p_m = mean(Pregnanetriol))

此法用 pipe 讓步驟明瞭，循序漸進，亦可重新定義變項名。
缺點是一旦使用了group_by，之後此資料的處理都會分組進行。若要算整體的參數，需要用 ungroup 把資料分組解除

method 5

    m5<-Cushings %>%
      nest(-Type) %>%
      mutate(avg = map(data, ~ apply(., 2, mean)), 
             res_1 = map_dbl(avg, "Tetrahydrocortisone"), 
             res_2 = map_dbl(avg, "Pregnanetriol"))   
    class(m5)

## [1] "tbl_df"     "tbl"        "data.frame"

  此法優點為輸出多重格式，可以省去格式轉換的後續處理。缺點是需要一一定義要算的變項

Q5

寫一個可輸入樣本數、平均和標準差的 function，並輸出類似題目的圖表

    q5fxn <- function(n,m,s){ns = rnorm(n,m,s);
                            q5avg = cumsum(ns)/1:n;
                            q5size = 1:n;
                            plot(q5size,q5avg,type = "l", col = "green",
                            xlab = "Sample Size",ylab = "Running average");
                            abline(h=m,lty="dashed");
                            grid()
    }

測試 function

    q5fxn(2000,400,30)

Q6

試寫 fxn

      q6fxn <- function(data,n){ cden <- 1-(sum(diff(data[1:n,1])^2)/(2*(n-1)*var(data[1:n,1])));
                         sc <- sqrt((n-2)/((n-1)*(n+1))) ;
                         pval <- 1-pnorm(cden/sc);
                         list(z=cden/sc,pvalue=pval)}

試算從0到38筆資料的 c-stat

      dta <- read.table("C:/for_English_path/0409Function/cstat.txt", header=T)
      
      q6fxn(dta,38)

## $z
## [1] 4.154908
## 
## $pvalue
## [1] 1.627096e-05

4/9 Homework

莊耘

16 April 2018

Q1

Q2

Q3

Q4

Q5

Q6

The End