Explain what does this statement do: lapply(lapply(search(), ls), length)
# 用於列出工作目錄中存在的所有object的名稱
ls()
character(0)
# 用於獲取R現有連結的package
search()
[1] ".GlobalEnv" "package:stats" "package:graphics"
[4] "package:grDevices" "package:utils" "package:datasets"
[7] "package:methods" "Autoloads" "package:base"
# 計算每一package中 function list 的長度
lapply(lapply(search(), ls), length)
[[1]]
[1] 0
[[2]]
[1] 449
[[3]]
[1] 87
[[4]]
[1] 113
[[5]]
[1] 247
[[6]]
[1] 104
[[7]]
[1] 203
[[8]]
[1] 0
[[9]]
[1] 1255
Convert the R script in the NZ schools example into a rmarkdown file and provide comments to each code chunk indicated by ‘##’. Give alternative code to perform the same calculation where appropriate.
## keep the school names with white spaces
<- read.csv("nzSchools.csv", as.is=2) #as.is??
dta
## data structure
str(dta)
'data.frame': 2571 obs. of 6 variables:
$ ID : int 1015 1052 1062 1092 1130 1018 1029 1030 1588 1154 ...
$ Name: chr "Hora Hora School" "Morningside School" "Onerahi School" "Raurimu Avenue School" ...
$ City: Factor w/ 541 levels "Ahaura","Ahipara",..: 533 533 533 533 533 533 533 533 533 533 ...
$ Auth: Factor w/ 4 levels "Other","Private",..: 3 3 3 3 3 3 3 3 4 3 ...
$ Dec : int 2 3 4 2 4 8 5 5 6 1 ...
$ Roll: int 318 200 455 86 577 329 637 395 438 201 ...
## dimension of the data
dim(dta)
[1] 2571 6
## 新建Size欄位,大於Roll中位數的為"Large"小於等於為"Small"
$Size <- ifelse(dta$Roll > median(dta$Roll), "Large", "Small")
dta
## 刪除size欄位
$Size <- NULL
dta
## 顯示資料前六筆
head(dta)
ID Name City Auth Dec Roll
1 1015 Hora Hora School Whangarei State 2 318
2 1052 Morningside School Whangarei State 3 200
3 1062 Onerahi School Whangarei State 4 455
4 1092 Raurimu Avenue School Whangarei State 2 86
5 1130 Whangarei School Whangarei State 4 577
6 1018 Hurupaki School Whangarei State 8 329
## 將Roll平均切三等份,分別為small, mediam, large
$Size <- cut(dta$Roll, 3, labels=c("Small", "Mediam", "Large"))
dta
## 顯示size的count
table(dta$Size)
Small Mediam Large
2555 15 1
## order函數返回的值表示位置,並以降序排列
$RollOrd <- order(dta$Roll, decreasing=T)
dta
## 顯示RollOrd 前六筆位置資料
## 第一筆RollOrd為1726,意思是要找第1726筆資料,才是第一列
head(dta[dta$RollOrd, ])
ID Name City Auth Dec Roll Size RollOrd
1726 498 Correspondence School Wellington State NA 5546 Large 753
301 28 Rangitoto College Auckland State 10 3022 Mediam 353
376 78 Avondale College Auckland State 4 2613 Mediam 712
2307 319 Burnside High School Christchurch State 8 2588 Mediam 709
615 41 Macleans College Auckland State 10 2476 Mediam 1915
199 43 Massey High School Auckland State 5 2452 Mediam 1683
# 顯示RollOrd 後六筆位置資料
tail(dta[dta$RollOrd, ])
ID Name City Auth Dec Roll Size
2401 1641 Amana Christian School Dunedin Private 9 7 Small
1590 2461 Tangimoana School Manawatu State 4 6 Small
1996 3598 Woodbank School Kaikoura State 4 6 Small
2112 3386 Jacobs River School Jacobs River State 5 6 Small
1514 2407 Ngamatapouri School Sth Taranaki District State 9 5 Small
1575 2420 Papanui Junction School Taihape State 5 5 Small
RollOrd
2401 2562
1590 266
1996 2478
2112 1501
1514 2377
1575 1542
## 根據city和Roll由大到小(z to A)排序,並顯示前六筆
head(dta[order(dta$City, dta$Roll, decreasing=T), ])
ID Name City Auth Dec Roll Size RollOrd
2548 401 Menzies College Wyndham State 4 356 Small 859
2549 4054 Wyndham School Wyndham State 5 94 Small 1163
1611 2742 Woodville School Woodville State 3 147 Small 726
1630 2640 Papatawa School Woodville State 7 27 Small 2273
2041 3600 Woodend School Woodend State 9 375 Small 1401
1601 399 Central Southland College Winton State 7 549 Small 450
## 根據city和Roll由大到小排序,並顯示後六筆
tail(dta[order(dta$City, dta$Roll, decreasing=T), ])
ID Name City Auth Dec Roll Size RollOrd
2169 3273 Albury School Albury State 8 30 Small 1010
2018 350 Akaroa Area School Akaroa State 8 125 Small 1051
2023 3332 Duvauchelle School Akaroa State 9 41 Small 749
335 1200 Ahuroa School Ahuroa State 7 22 Small 193
99 1000 Ahipara School Ahipara State 3 241 Small 1963
2117 2105 Awahono School - Grey Valley Ahaura State 4 119 Small 364
## 只呈現Roll由大到小的數值 integer
head(sort(dta$Roll, decreasing=T))
[1] 5546 3022 2613 2588 2476 2452
tail(sort(dta$Roll, decreasing=T))
[1] 7 6 6 6 5 5
## 只顯示經排序後前六個city name
head(sort(dta$City, dta$Roll, decreasing=T))
[1] Wyndham Wyndham Woodville Woodville Woodend Winton
541 Levels: Ahaura Ahipara Ahuroa Akaroa Albury Alexandra Alfriston ... Wyndham
## one varible
::p_load(dplyr)
pacmanhead(arrange(dta,desc(Roll)))
ID Name City Auth Dec Roll Size RollOrd
1 498 Correspondence School Wellington State NA 5546 Large 753
2 28 Rangitoto College Auckland State 10 3022 Mediam 353
3 78 Avondale College Auckland State 4 2613 Mediam 712
4 319 Burnside High School Christchurch State 8 2588 Mediam 709
5 41 Macleans College Auckland State 10 2476 Mediam 1915
6 43 Massey High School Auckland State 5 2452 Mediam 1683
tail(arrange(dta,desc(Roll)))
ID Name City Auth Dec Roll Size
2566 1641 Amana Christian School Dunedin Private 9 7 Small
2567 2461 Tangimoana School Manawatu State 4 6 Small
2568 3598 Woodbank School Kaikoura State 4 6 Small
2569 3386 Jacobs River School Jacobs River State 5 6 Small
2570 2407 Ngamatapouri School Sth Taranaki District State 9 5 Small
2571 2420 Papanui Junction School Taihape State 5 5 Small
RollOrd
2566 2562
2567 266
2568 2478
2569 1501
2570 2377
2571 1542
## groupby
|> group_by(City) |> arrange(desc(Roll)) |> head() dta
# A tibble: 6 x 8
# Groups: City [3]
ID Name City Auth Dec Roll Size RollOrd
<int> <chr> <fct> <fct> <int> <int> <fct> <int>
1 498 Correspondence School Wellington State NA 5546 Large 753
2 28 Rangitoto College Auckland State 10 3022 Mediam 353
3 78 Avondale College Auckland State 4 2613 Mediam 712
4 319 Burnside High School Christchurch State 8 2588 Mediam 709
5 41 Macleans College Auckland State 10 2476 Mediam 1915
6 43 Massey High School Auckland State 5 2452 Mediam 1683
## 顯示Auth的列聯表
table(dta$Auth)
Other Private State State Integrated
1 99 2144 327
## 將列聯表存成authtbl,並顯示authtbl
<- table(dta$Auth); authtbl authtbl
Other Private State State Integrated
1 99 2144 327
## 顯示authtbl的類型
class(authtbl)
[1] "table"
## 顯示Auth為other的資料
$Auth == "Other", ] dta[dta
ID Name City Auth Dec Roll Size RollOrd
2315 518 Kingslea School Christchurch Other 1 51 Small 1579
## xtabs 顯示Auth與Dec 2x2列聯表
xtabs(~ Auth + Dec, data=dta)
Dec
Auth 1 2 3 4 5 6 7 8 9 10
Other 1 0 0 0 0 0 0 0 0 0
Private 0 0 2 6 2 2 6 11 12 38
State 259 230 208 219 214 215 188 200 205 205
State Integrated 12 22 35 28 38 34 45 45 37 31
## ftable 與xtable結果相同
with(dta,ftable(Auth,Dec))
Dec 1 2 3 4 5 6 7 8 9 10
Auth
Other 1 0 0 0 0 0 0 0 0 0
Private 0 0 2 6 2 2 6 11 12 38
State 259 230 208 219 214 215 188 200 205 205
State Integrated 12 22 35 28 38 34 45 45 37 31
## 計算Roll的平均值
mean(dta$Roll)
[1] 295.4737
## 計算Auth=Private之樣本Roll的平均值
mean(dta$Roll[dta$Auth == "Private"])
[1] 308.798
## 計算不同種類Auth之Roll的平均值
aggregate(dta["Roll"], by=list(dta$Auth), FUN=mean)
Group.1 Roll
1 Other 51.0000
2 Private 308.7980
3 State 300.6301
4 State Integrated 258.3792
## group_by and summarise
|> group_by(Auth) |> summarise(mean = mean(Roll)) dta
# A tibble: 4 x 2
Auth mean
<fct> <dbl>
1 Other 51
2 Private 309.
3 State 301.
4 State Integrated 258.
## Dec > 5 則Rich為True,反之為False
$Rich <- dta$Dec > 5; dta$Rich dta
## 計算Auth 與Rich 分層後的Roll的平均值
## aggregate似乎略過了Rich=NA?
aggregate(dta["Roll"], by=list(dta$Auth, dta$Rich), FUN=mean)
Group.1 Group.2 Roll
1 Other FALSE 51.0000
2 Private FALSE 151.4000
3 State FALSE 261.7487
4 State Integrated FALSE 183.2370
5 Private TRUE 402.5362
6 State TRUE 338.8243
7 State Integrated TRUE 311.2135
## group_by and summarise
|> group_by(Auth, Rich) |> summarise(mean = mean(Roll)) dta
# A tibble: 9 x 3
# Groups: Auth [4]
Auth Rich mean
<fct> <lgl> <dbl>
1 Other FALSE 51
2 Private FALSE 151.
3 Private TRUE 403.
4 Private NA 64.1
5 State FALSE 262.
6 State TRUE 339.
7 State NA 5546
8 State Integrated FALSE 183.
9 State Integrated TRUE 311.
## 顯示Auth各種類之Roll的range
by(dta["Roll"], INDICES=list(dta$Auth), FUN=range)
: Other
[1] 51 51
------------------------------------------------------------
: Private
[1] 7 1663
------------------------------------------------------------
: State
[1] 5 5546
------------------------------------------------------------
: State Integrated
[1] 18 1475
Split the ChickWeight{datasets} data by individual chicks to extract separate slope estimates of regressing weight onto Time for each chick.
<- ChickWeight
dta3 head(dta3)
weight Time Chick Diet
1 42 0 1 1
2 51 2 1 1
3 59 4 1 1
4 64 6 1 1
5 76 8 1 1
6 93 10 1 1
# Chick level重新排序
$Chick <- factor(as.numeric(as.character(dta3$Chick)))
dta3levels(dta3$Chick)
[1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14" "15"
[16] "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "30"
[31] "31" "32" "33" "34" "35" "36" "37" "38" "39" "40" "41" "42" "43" "44" "45"
[46] "46" "47" "48" "49" "50"
# split and lapply
split(dta3, dta3$Chick)|> #將資料依Chick進行dataset切割
lapply(function(x) lm(weight ~ Time, data = x)) |> sapply(coef) #將Chick的subdata逐一放入進行lm,並使用sapply產生coef
1 2 3 4 5 6 7
(Intercept) 24.465436 24.724853 23.17955 32.86568 16.89563 44.123431 5.842535
Time 7.987899 8.719861 8.48737 6.08864 10.05536 6.378006 13.205264
8 9 10 11 12 13
(Intercept) 43.727273 52.094086 38.695054 47.921948 21.939797 43.384359
Time 4.827273 2.663137 4.066102 7.510967 8.440629 2.239601
14 15 16 17 18 19 20
(Intercept) 20.52488 46.83333 43.392857 43.030706 39 31.21222 37.667826
Time 11.98245 1.89881 1.053571 4.531538 -2 5.08743 3.732718
21 22 23 24 25 26 27
(Intercept) 15.56330 40.082590 38.428074 53.067766 19.65119 20.70715 29.858569
Time 15.47512 5.877931 6.685978 1.207533 11.30676 10.10316 7.379368
28 29 30 31 32 33 34
(Intercept) 23.984874 5.882771 39.109666 19.13099 13.69173 45.830283 5.081682
Time 9.703676 12.453487 5.898351 10.02617 13.18091 5.855241 15.000151
35 36 37 38 39 40 41
(Intercept) 4.757979 25.85403 29.608834 10.67282 17.03661 10.83830 39.337922
Time 17.258811 9.99047 6.677053 12.06051 10.73710 13.44229 8.159885
42 43 44 45 46 47
(Intercept) 19.86507 52.185751 44.909091 35.673121 27.771744 36.489790
Time 11.83679 8.318863 6.354545 7.686432 9.738466 8.374981
48 49 50
(Intercept) 7.947663 31.662986 23.78218
Time 13.714718 9.717894 11.33293
|> group_by(dta3$Chick) |>
dta3 do ({res<-lm(weight ~ Time, data=.)
::tidy(coef(res))
broom
} )
# A tibble: 100 x 3
# Groups: dta3$Chick [50]
`dta3$Chick` names x
<fct> <chr> <dbl>
1 1 (Intercept) 24.5
2 1 Time 7.99
3 2 (Intercept) 24.7
4 2 Time 8.72
5 3 (Intercept) 23.2
6 3 Time 8.49
7 4 (Intercept) 32.9
8 4 Time 6.09
9 5 (Intercept) 16.9
10 5 Time 10.1
# ... with 90 more rows
library(dplyr)
par(mfrow=c(3,3))
lapply(split(dta3, dta3$Chick), function(x){
plot(x$weight ~ x$Time,
xlab="Time (day)",
ylab="Weight (gm)")
legend('bottomright',
paste("Chick", x$Chick[1], sep="="),
bty='n')
abline(lm(x$weight ~ x$Time),
lwd=1,
col="firebrick")
})
library(ggplot2)
<- dta3[, -c(3,4)] #定住總體回歸線用
datshade ggplot(dta3, aes(x=Time, y=weight))+
geom_point()+
geom_smooth(method= lm, formula= y~x, se=F)+
geom_line(datshade, mapping=aes(x=Time, y=weight),
stat="smooth",method = "lm", formula = y ~ x,
se = FALSE, color="firebrick", alpha=0.5)+# 畫總體回歸線
facet_wrap(.~Chick, ncol=10, nrow=5)+
theme_minimal()
# a case study - II
##
<- read.table("NCEA2007.txt", sep=":", quote="", h=T, as.is=T) #as.is=T, Name為chr;#as.is=F, Name為factor;
dta4
## row x column
dim(dta4)
[1] 88 4
## 資料結構
str(dta4)
'data.frame': 88 obs. of 4 variables:
$ Name : chr "Al-Madinah School" "Alfriston College" "Ambury Park Centre for Riding Therapy" "Aorere College" ...
$ Level1: num 61.5 53.9 33.3 39.5 71.2 22.1 50.8 57.3 89.3 59.8 ...
$ Level2: num 75 44.1 20 50.2 78.9 30.8 34.8 49.8 89.7 65.7 ...
$ Level3: num 0 0 0 30.6 55.5 26.3 48.9 44.6 88.6 50.4 ...
## 顯示前六筆
head(dta4)
Name Level1 Level2 Level3
1 Al-Madinah School 61.5 75.0 0.0
2 Alfriston College 53.9 44.1 0.0
3 Ambury Park Centre for Riding Therapy 33.3 20.0 0.0
4 Aorere College 39.5 50.2 30.6
5 Auckland Girls' Grammar School 71.2 78.9 55.5
6 Auckland Grammar 22.1 30.8 26.3
## 計算扣除colum1後,每個column的mean
## output 為 numeric
<-apply(dta4[, -1], MARGIN=2, FUN=mean) #MARGIN=2, columns;MARGIN=1, rows;
a a
Level1 Level2 Level3
62.26705 61.06818 47.97614
class(a)
[1] "numeric"
## list apply
## output 為 list
<-lapply(dta4[, -1], FUN=mean)
b b
$Level1
[1] 62.26705
$Level2
[1] 61.06818
$Level3
[1] 47.97614
class(b)
[1] "list"
## simplify the list apply
## output 為 numeric
<-sapply(dta4[, -1], FUN=mean)
c c
Level1 Level2 Level3
62.26705 61.06818 47.97614
class(c)
[1] "numeric"
## 計算扣除colum1後,每個column的range
## output 為 "matrix" "array"
<-apply(dta4[, -1], MARGIN=2, FUN=range)
d d
Level1 Level2 Level3
[1,] 2.8 0.0 0.0
[2,] 97.4 95.7 95.7
class(d)
[1] "matrix" "array"
## output 為 list
<-lapply(dta4[, -1], FUN=range)
e e
$Level1
[1] 2.8 97.4
$Level2
[1] 0.0 95.7
$Level3
[1] 0.0 95.7
class(e)
[1] "list"
## output 為 "matrix" "array
<-sapply(dta4[, -1], FUN=range)
f f
Level1 Level2 Level3
[1,] 2.8 0.0 0.0
[2,] 97.4 95.7 95.7
class(f)
[1] "matrix" "array"
## subset data by level of Roll and keep value of Auth
<- split(dta$Roll, dta$Auth)
rollsByAuth
## 資料結構
str(rollsByAuth)
List of 4
$ Other : int 51
$ Private : int [1:99] 255 39 154 73 83 25 95 85 94 729 ...
$ State : int [1:2144] 318 200 455 86 577 329 637 395 201 267 ...
$ State Integrated: int [1:327] 438 26 191 560 151 114 126 171 211 57 ...
## 資料型態為list
class(rollsByAuth)
[1] "list"
## output 為list
<-lapply(split(dta$Roll, dta$Auth), mean)
g g
$Other
[1] 51
$Private
[1] 308.798
$State
[1] 300.6301
$`State Integrated`
[1] 258.3792
class(g)
[1] "list"
## output 為 numeric
<-sapply(split(dta$Roll, dta$Auth), mean)
h h
Other Private State State Integrated
51.0000 308.7980 300.6301 258.3792
class(h)
[1] "numeric"