Cushings{MASS}
5種切割資料的語法運用
KY
2020-04-29
library(lattice)
library(dplyr)
library(tidyr)
library(ggplot2)
library(tidyverse)
#
# Cushings example
##pacman語法
library(pacman)
##開啟MASS資料
pacman::p_load(MASS, tidyverse)
# method 1 語法aggregate分組,按照要求把資料打散再聚合,然後對聚合以後的資料進行求平均等
aggregate( . ~ Type, data = Cushings, mean)
## Type Tetrahydrocortisone Pregnanetriol
## 1 a 2.966667 2.44
## 2 b 8.180000 1.12
## 3 c 19.720000 5.50
## 4 u 14.016667 1.20
# method 2 語法split切割資料,[,-3]位置,指定Cushings$Type資料,function(x)循環運算,apply向量求平均
# sapply會給一個 list,依據()內指定的功能函數來運算,最後回傳一個 vector,而不是 list。
sapply(split(Cushings[,-3], Cushings$Type), function(x) apply(x, 2, mean))
## a b c u
## Tetrahydrocortisone 2.966667 8.18 19.72 14.01667
## Pregnanetriol 2.440000 1.12 5.50 1.20000
# method 3 語法do.call 執行,用字串進行的函數,針對list。rbind()是「列」的合併
# subset為篩選邏輯,Y是沒有(-Type),apply計算矩陣平均
do.call("rbind", as.list(
by(Cushings, list(Cushings$Type), function(x) {
y <- subset(x, select = -Type)
apply(y, 2, mean)
}
)))
## Tetrahydrocortisone Pregnanetriol
## a 2.966667 2.44
## b 8.180000 1.12
## c 19.720000 5.50
## u 14.016667 1.20
# method 4 感覺是tidyr語法,%>%將指令逐步指定到到下層,分組group_by(Type)欄,summarize加總兩者指定變項
# 下指令前可能需要對資料有一定的了解,才能準確的切割資料
Cushings %>%
group_by(Type) %>%
summarize( t_m = mean(Tetrahydrocortisone), p_m = mean(Pregnanetriol))
## # A tibble: 4 x 3
## Type t_m p_m
## <fct> <dbl> <dbl>
## 1 a 2.97 2.44
## 2 b 8.18 1.12
## 3 c 19.7 5.5
## 4 u 14.0 1.2
# method 5 雖然一樣是tidyr語法,%>%逐步指令,但nest似乎是巢狀結構,再由mutate新增指定變數
# map()需逐一計算的向量,計算所需的函數
Cushings %>%
nest(-Type) %>%
mutate(avg = map(data, ~ apply(., 2, mean)),
res_1 = map_dbl(avg, "Tetrahydrocortisone"),
res_2 = map_dbl(avg, "Pregnanetriol"))
## # A tibble: 4 x 5
## Type data avg res_1 res_2
## <fct> <list> <list> <dbl> <dbl>
## 1 a <tibble [6 x 2]> <dbl [2]> 2.97 2.44
## 2 b <tibble [10 x 2]> <dbl [2]> 8.18 1.12
## 3 c <tibble [5 x 2]> <dbl [2]> 19.7 5.5
## 4 u <tibble [6 x 2]> <dbl [2]> 14.0 1.2
##結語:個人解讀容易度method:4 > 1 = 2 > 5 > 3
###