Cushings{MASS}

5種切割資料的語法運用

KY

2020-04-29

library(lattice)
library(dplyr)
library(tidyr)
library(ggplot2)
library(tidyverse)
#
# Cushings example
##pacman語法
library(pacman)
##開啟MASS資料
pacman::p_load(MASS, tidyverse)

# method 1 語法aggregate分組,按照要求把資料打散再聚合,然後對聚合以後的資料進行求平均等

aggregate( . ~ Type, data = Cushings, mean)
##   Type Tetrahydrocortisone Pregnanetriol
## 1    a            2.966667          2.44
## 2    b            8.180000          1.12
## 3    c           19.720000          5.50
## 4    u           14.016667          1.20
# method 2 語法split切割資料,[,-3]位置,指定Cushings$Type資料,function(x)循環運算,apply向量求平均
# sapply會給一個 list,依據()內指定的功能函數來運算,最後回傳一個 vector,而不是 list。
sapply(split(Cushings[,-3], Cushings$Type), function(x) apply(x, 2, mean))
##                            a    b     c        u
## Tetrahydrocortisone 2.966667 8.18 19.72 14.01667
## Pregnanetriol       2.440000 1.12  5.50  1.20000
# method 3 語法do.call 執行,用字串進行的函數,針對list。rbind()是「列」的合併
# subset為篩選邏輯,Y是沒有(-Type),apply計算矩陣平均
do.call("rbind", as.list(
  by(Cushings, list(Cushings$Type), function(x) {
    y <- subset(x, select =  -Type)
    apply(y, 2, mean)
  }
)))
##   Tetrahydrocortisone Pregnanetriol
## a            2.966667          2.44
## b            8.180000          1.12
## c           19.720000          5.50
## u           14.016667          1.20
# method 4 感覺是tidyr語法,%>%將指令逐步指定到到下層,分組group_by(Type)欄,summarize加總兩者指定變項
# 下指令前可能需要對資料有一定的了解,才能準確的切割資料
Cushings %>%
 group_by(Type) %>%
 summarize( t_m = mean(Tetrahydrocortisone), p_m = mean(Pregnanetriol))
## # A tibble: 4 x 3
##   Type    t_m   p_m
##   <fct> <dbl> <dbl>
## 1 a      2.97  2.44
## 2 b      8.18  1.12
## 3 c     19.7   5.5 
## 4 u     14.0   1.2
# method 5 雖然一樣是tidyr語法,%>%逐步指令,但nest似乎是巢狀結構,再由mutate新增指定變數
# map()需逐一計算的向量,計算所需的函數
Cushings %>%
 nest(-Type) %>%
 mutate(avg = map(data, ~ apply(., 2, mean)), 
        res_1 = map_dbl(avg, "Tetrahydrocortisone"), 
        res_2 = map_dbl(avg, "Pregnanetriol")) 
## # A tibble: 4 x 5
##   Type  data              avg       res_1 res_2
##   <fct> <list>            <list>    <dbl> <dbl>
## 1 a     <tibble [6 x 2]>  <dbl [2]>  2.97  2.44
## 2 b     <tibble [10 x 2]> <dbl [2]>  8.18  1.12
## 3 c     <tibble [5 x 2]>  <dbl [2]> 19.7   5.5 
## 4 u     <tibble [6 x 2]>  <dbl [2]> 14.0   1.2
##結語:個人解讀容易度method:4 > 1 = 2 > 5 > 3 
###