# Cushings example
library(MASS)
library(tidyr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:MASS':
## 
##     select

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(purrr)
library(lattice)

使用apply(Cushings[,-3], 2, mean)指令，就可以列出Tetrahydrocortisone和Pregnanetriolk的平均，但是無法依Type分類，以下的方法，都是分類Type後計算平均。

method 1

# 依照Type的分類計算其他變項的平均，指令最為簡潔
aggregate( . ~ Type, data = Cushings, mean)

##   Type Tetrahydrocortisone Pregnanetriol
## 1    a            2.966667          2.44
## 2    b            8.180000          1.12
## 3    c           19.720000          5.50
## 4    u           14.016667          1.20

# 因為已經依照Type分類好，繪圖方便
dta <- aggregate( . ~ Type, data = Cushings, mean)
stripplot(Pregnanetriol ~ Tetrahydrocortisone | Type, data=dta)

method 2

# 先依照Type切割資料，再計算每個欄位的平均
sapply(split(Cushings[,-3], Cushings$Type), 
       function(x) apply(x, 2, mean))# apply by column

##                            a    b     c        u
## Tetrahydrocortisone 2.966667 8.18 19.72 14.01667
## Pregnanetriol       2.440000 1.12  5.50  1.20000

# 如果想依照Type分類繪圖，需要再重整資料

method 3

# 將list合併
do.call("rbind", as.list(
  by(Cushings, list(Cushings$Type), function(x) {
    y <- subset(x, select =  -Type) #把Type切割掉
    apply(y, 2, mean)# apply y物件 by column
  }
)))

##   Tetrahydrocortisone Pregnanetriol
## a            2.966667          2.44
## b            8.180000          1.12
## c           19.720000          5.50
## u           14.016667          1.20

# 如果想依照Type分類繪圖，需要再創立Type變項

method 4

# 先用group_by將Type分類，直接用summarize列出mean
Cushings %>%
 group_by(Type) %>%
 summarize( t_m = mean(Tetrahydrocortisone), p_m = mean(Pregnanetriol))

## # A tibble: 4 x 3
##   Type    t_m   p_m
##   <fct> <dbl> <dbl>
## 1 a      2.97  2.44
## 2 b      8.18  1.12
## 3 c     19.7   5.5 
## 4 u     14.0   1.2

# 指令方便，且依照Type做分類，並且能在操作的過程中更改變項名稱

method 5

Cushings %>%
 nest(-Type) %>% # 去除type，並用nest分類
 mutate(avg = map(data, ~ apply(., 2, mean)), 
        res_1 = map_dbl(avg, "Tetrahydrocortisone"), 
        res_2 = map_dbl(avg, "Pregnanetriol"))

## Warning: All elements of `...` must be named.
## Did you want `data = c(Tetrahydrocortisone, Pregnanetriol)`?

## # A tibble: 4 x 5
##   Type  data              avg       res_1 res_2
##   <fct> <list>            <list>    <dbl> <dbl>
## 1 a     <tibble [6 x 2]>  <dbl [2]>  2.97  2.44
## 2 b     <tibble [10 x 2]> <dbl [2]>  8.18  1.12
## 3 c     <tibble [5 x 2]>  <dbl [2]> 19.7   5.5 
## 4 u     <tibble [6 x 2]>  <dbl [2]> 14.0   1.2

# 依照Type做分類，因為有使用nest指令，除了計算平均外，也能看到每個Type的資料個數

2020-04-27-In-class-3

Chang, C. Y.

2020-04-28

method 1

method 2

method 3

method 4

method 5