環境設定
pacman::p_load(tidyr,broom,magrittr,dplyr,ggplot2,nlme,kableExtra,furniture,purrr)
options(digit=4)
寫一個把台幣轉換成美元的 function
q1fxn <- function(){ NT<- readline(prompt = "Enter $NT:");
US = as.numeric(NT)*29.32;
cat(NT,"$NT","=",US,"$US",sep="")}
喚出 function
q1fxn()
## Enter $NT:
## $NT=NA$US
輸入一個值
200
## [1] 200
檢視資料,將資料存成物件
head(ChickWeight)
## Grouped Data: weight ~ Time | Chick
## weight Time Chick Diet
## 1 42 0 1 1
## 2 51 2 1 1
## 3 59 4 1 1
## 4 64 6 1 1
## 5 76 8 1 1
## 6 93 10 1 1
str(ChickWeight)
## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 578 obs. of 4 variables:
## $ weight: num 42 51 59 64 76 93 106 125 149 171 ...
## $ Time : num 0 2 4 6 8 10 12 14 16 18 ...
## $ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 15 15 15 15 15 15 15 15 15 15 ...
## $ Diet : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, "formula")=Class 'formula' language weight ~ Time | Chick
## .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
## - attr(*, "outer")=Class 'formula' language ~Diet
## .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
## - attr(*, "labels")=List of 2
## ..$ x: chr "Time"
## ..$ y: chr "Body weight"
## - attr(*, "units")=List of 2
## ..$ x: chr "(days)"
## ..$ y: chr "(gm)"
q2dta<-ChickWeight
把資料依據 individual chicks 分組
q2split<-split(q2dta,q2dta$Chick)
嘗試用 lapply
q2cor <-sapply(q2split,function(x) lm(weight~Time,data = x)$coef)
q2cor
## 18 16 15 13 9 20 10
## (Intercept) 39 43.392857 46.83333 43.384359 52.094086 37.667826 38.695054
## Time -2 1.053571 1.89881 2.239601 2.663137 3.732718 4.066102
## 8 17 19 4 6 11
## (Intercept) 43.727273 43.030706 31.21222 32.86568 44.123431 47.921948
## Time 4.827273 4.531538 5.08743 6.08864 6.378006 7.510967
## 3 1 12 2 5 14
## (Intercept) 23.17955 24.465436 21.939797 24.724853 16.89563 20.52488
## Time 8.48737 7.987899 8.440629 8.719861 10.05536 11.98245
## 7 24 30 22 23 27
## (Intercept) 5.842535 53.067766 39.109666 40.082590 38.428074 29.858569
## Time 13.205264 1.207533 5.898351 5.877931 6.685978 7.379368
## 28 26 25 29 21 33
## (Intercept) 23.984874 20.70715 19.65119 5.882771 15.56330 45.830283
## Time 9.703676 10.10316 11.30676 12.453487 15.47512 5.855241
## 37 36 31 39 38 32
## (Intercept) 29.608834 25.85403 19.13099 17.03661 10.67282 13.69173
## Time 6.677053 9.99047 10.02617 10.73710 12.06051 13.18091
## 40 34 35 44 45 43
## (Intercept) 10.83830 5.081682 4.757979 44.909091 35.673121 52.185751
## Time 13.44229 15.000151 17.258811 6.354545 7.686432 8.318863
## 41 47 49 46 50 42
## (Intercept) 39.337922 36.489790 31.662986 27.771744 23.78218 19.86507
## Time 8.159885 8.374981 9.717894 9.738466 11.33293 11.83679
## 48
## (Intercept) 7.947663
## Time 13.714718
建立一個 function 能輸入自由度便畫出 t分配和z分配圖
q3fxn <- function(df=30){
x=seq(-3.3,3.3,0.05);
plot(x,dnorm(x),type = "l");
lines(x,dt(x,as.numeric(df),log=F),type = "l",col = "green")}
喚出 function,輸入一個自由度
q3fxn(16)
Cushings example
pacman::p_load(MASS, tidyverse)
method 1
m1 <- aggregate( . ~ Type, data = Cushings, mean)
class(m1)
## [1] "data.frame"
agregate 將data frame資料根據 type 分組,計算出統計值。輸出以變項為行,以分組為列
優點是步驟單純,直接了當,缺點是能算的 function 相對有限
method 2
m2<-sapply(split(Cushings[,-3], Cushings$Type), function(x) apply(x, 2, mean))
class(m2)
## [1] "matrix"
用 split 先將資料根據 type 分組,產生一個分組後的list
將分組資料用 sapply 丟入 function(x),而此 function 是用 apply 算輸入的資料x中,每一行的平均。
於是每組的平均被計算出來,並藉由 sapply 以array型態輸出
以變項為列,以分組為行(wide format)
method 3
m3 <- do.call("rbind", as.list(
by(Cushings, list(Cushings$Type), function(x) {
y <- subset(x, select = -Type)
apply(y, 2, mean)
}
)))
class(m3)
## [1] "matrix"
do.call 喚出 function。它的 argument 需為一個 list, 所以加入了 as.list 的指令。
by 將 function(x)套用到根據 Type 分出的各組,而它的分組依據需是一個factor 的 list, 所以用 list 指令建立之
function (x)中,subset篩選出需要帶入function 的資料範圍,然後用 apply 將 mean 指令套用到此範圍的每一行上。
function (x) 定義如何算 mean,by 定義分組。算出每一組的 mean 後,將結果轉為 List 以符合 do.call 的格式要求
do.call 用 rbind 將每組的資料疊起來,輸出一個以組為行,以變項為列的matrix.
此法缺點是麻煩,需要把很多東西轉換成 list,也需要把最後的 list 重新組合成 matrix.
method 4
m4 <- Cushings %>%
group_by(Type) %>%
summarize( t_m = mean(Tetrahydrocortisone), p_m = mean(Pregnanetriol))
此法用 pipe 讓步驟明瞭,循序漸進,亦可重新定義變項名。
缺點是一旦使用了group_by,之後此資料的處理都會分組進行。若要算整體的參數,需要用 ungroup 把資料分組解除
method 5
m5<-Cushings %>%
nest(-Type) %>%
mutate(avg = map(data, ~ apply(., 2, mean)),
res_1 = map_dbl(avg, "Tetrahydrocortisone"),
res_2 = map_dbl(avg, "Pregnanetriol"))
class(m5)
## [1] "tbl_df" "tbl" "data.frame"
此法優點為輸出多重格式,可以省去格式轉換的後續處理。缺點是需要一一定義要算的變項
寫一個可輸入樣本數、平均和標準差的 function,並輸出類似題目的圖表
q5fxn <- function(n,m,s){ns = rnorm(n,m,s);
q5avg = cumsum(ns)/1:n;
q5size = 1:n;
plot(q5size,q5avg,type = "l", col = "green",
xlab = "Sample Size",ylab = "Running average");
abline(h=m,lty="dashed");
grid()
}
測試 function
q5fxn(2000,400,30)
試寫 fxn
q6fxn <- function(data,n){ cden <- 1-(sum(diff(data[1:n,1])^2)/(2*(n-1)*var(data[1:n,1])));
sc <- sqrt((n-2)/((n-1)*(n+1))) ;
pval <- 1-pnorm(cden/sc);
list(z=cden/sc,pvalue=pval)}
試算從0到38筆資料的 c-stat
dta <- read.table("C:/for_English_path/0409Function/cstat.txt", header=T)
q6fxn(dta,38)
## $z
## [1] 4.154908
##
## $pvalue
## [1] 1.627096e-05