Nếu đó là công việc quan trọng - hãy tự mình làm lấy

library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1       ✔ purrr   0.3.2  
## ✔ tibble  2.1.1       ✔ dplyr   0.8.0.1
## ✔ tidyr   0.8.3       ✔ stringr 1.4.0  
## ✔ readr   1.3.1       ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

1 Giới thiệu

Không có package nào làm cho tôi đọc nhiều như purrr. Vì nó quá hay, quá thông minh

2 as_mapper()

Chuyển 1 object thành 1 mapper function

  • Là 1 cách viết khác của hàm
f_add <- function(x, y){x+y}

addition <- as_mapper(~ .x + .y)
addition
## <lambda>
## function (..., .x = ..1, .y = ..2, . = ..1) 
## .x + .y
## attr(,"class")
## [1] "rlang_lambda_function"
f_add(3,4)
## [1] 7
addition(3, 4)
## [1] 7
map2_int(1:3, 4:6, addition)
## [1] 5 7 9
map2_int(1:3, 4:6, f_add)
## [1] 5 7 9
as_mapper(f_add)
## function(x, y){x+y}
## <bytecode: 0x55cabf439268>
  • Đơn giản hơn khi chuyển đổi 1 object thành 1 hàm
get_rsquared <- compose(as_mapper("r.squared"), summary, lm)

get_rsquared(Sepal.Length ~ Sepal.Width, data = iris)
## [1] 0.01382265
l <- list(rnorm(10),
          rnorm(100), 
          rnorm(1000))
pmap_dbl(list(l, 0.1, TRUE), ~ mean(..1, ..2, ..3)) 
## [1] -0.37707235  0.05169313 -0.02798717

3 rerun()

Chạy 1 hàm với n lần

train <- rerun(20, sample_frac(airquality, size = 0.8))
validation <- map(train, ~ anti_join(airquality, .x))
## Joining, by = c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")
## Joining, by = c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")
## Joining, by = c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")
## Joining, by = c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")
## Joining, by = c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")
## Joining, by = c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")
## Joining, by = c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")
## Joining, by = c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")
## Joining, by = c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")
## Joining, by = c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")
## Joining, by = c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")
## Joining, by = c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")
## Joining, by = c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")
## Joining, by = c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")
## Joining, by = c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")
## Joining, by = c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")
## Joining, by = c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")
## Joining, by = c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")
## Joining, by = c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")
## Joining, by = c("Ozone", "Solar.R", "Wind", "Temp", "Month", "Day")

Kiểm tra quy mô mẫu validation

map_int(validation, nrow) %>% every(~.x == 31)
## [1] TRUE

4 map_if()

Chỉ áp dụng khi thỏa mãn điều kiện của hàm if, giữ nguyên đầu vào nếu không thỏa mãn điều kiện if

Kiểm định xem 1 biến có phân phối chuẩn hay không

map_if(iris, is.numeric, shapiro.test) 
## $Sepal.Length
## 
##  Shapiro-Wilk normality test
## 
## data:  .x[[i]]
## W = 0.97609, p-value = 0.01018
## 
## 
## $Sepal.Width
## 
##  Shapiro-Wilk normality test
## 
## data:  .x[[i]]
## W = 0.98492, p-value = 0.1012
## 
## 
## $Petal.Length
## 
##  Shapiro-Wilk normality test
## 
## data:  .x[[i]]
## W = 0.87627, p-value = 7.412e-10
## 
## 
## $Petal.Width
## 
##  Shapiro-Wilk normality test
## 
## data:  .x[[i]]
## W = 0.90183, p-value = 1.68e-08
## 
## 
## $Species
##   [1] setosa     setosa     setosa     setosa     setosa     setosa    
##   [7] setosa     setosa     setosa     setosa     setosa     setosa    
##  [13] setosa     setosa     setosa     setosa     setosa     setosa    
##  [19] setosa     setosa     setosa     setosa     setosa     setosa    
##  [25] setosa     setosa     setosa     setosa     setosa     setosa    
##  [31] setosa     setosa     setosa     setosa     setosa     setosa    
##  [37] setosa     setosa     setosa     setosa     setosa     setosa    
##  [43] setosa     setosa     setosa     setosa     setosa     setosa    
##  [49] setosa     setosa     versicolor versicolor versicolor versicolor
##  [55] versicolor versicolor versicolor versicolor versicolor versicolor
##  [61] versicolor versicolor versicolor versicolor versicolor versicolor
##  [67] versicolor versicolor versicolor versicolor versicolor versicolor
##  [73] versicolor versicolor versicolor versicolor versicolor versicolor
##  [79] versicolor versicolor versicolor versicolor versicolor versicolor
##  [85] versicolor versicolor versicolor versicolor versicolor versicolor
##  [91] versicolor versicolor versicolor versicolor versicolor versicolor
##  [97] versicolor versicolor versicolor versicolor virginica  virginica 
## [103] virginica  virginica  virginica  virginica  virginica  virginica 
## [109] virginica  virginica  virginica  virginica  virginica  virginica 
## [115] virginica  virginica  virginica  virginica  virginica  virginica 
## [121] virginica  virginica  virginica  virginica  virginica  virginica 
## [127] virginica  virginica  virginica  virginica  virginica  virginica 
## [133] virginica  virginica  virginica  virginica  virginica  virginica 
## [139] virginica  virginica  virginica  virginica  virginica  virginica 
## [145] virginica  virginica  virginica  virginica  virginica  virginica 
## Levels: setosa versicolor virginica

5 Phân tích tương quan

Tổ hợp 2 tên biến

library(tidystringdist) # Works since v0.1.2 
comb <- tidy_comb_all(names(airquality))
knitr::kable(comb)
V1 V2
Ozone Solar.R
Ozone Wind
Ozone Temp
Ozone Month
Ozone Day
Solar.R Wind
Solar.R Temp
Solar.R Month
Solar.R Day
Wind Temp
Wind Month
Wind Day
Temp Month
Temp Day
Month Day

Bảng các hệ số tương quan và kiểm định ý nghĩa của các hệ số

bulk_cor <- pmap(comb, ~ cor.test(airquality[[.x]], airquality[[.y]])) %>% 
  map_df(broom::tidy) %>% 
  cbind(comb, .)

knitr::kable(bulk_cor, digits = 3)
V1 V2 estimate statistic p.value parameter conf.low conf.high method alternative
Ozone Solar.R 0.348 3.880 0.000 109 0.173 0.502 Pearson’s product-moment correlation two.sided
Ozone Wind -0.602 -8.040 0.000 114 -0.706 -0.471 Pearson’s product-moment correlation two.sided
Ozone Temp 0.698 10.418 0.000 114 0.591 0.781 Pearson’s product-moment correlation two.sided
Ozone Month 0.165 1.781 0.078 114 -0.018 0.337 Pearson’s product-moment correlation two.sided
Ozone Day -0.013 -0.141 0.888 114 -0.195 0.169 Pearson’s product-moment correlation two.sided
Solar.R Wind -0.057 -0.683 0.496 144 -0.217 0.107 Pearson’s product-moment correlation two.sided
Solar.R Temp 0.276 3.444 0.001 144 0.119 0.419 Pearson’s product-moment correlation two.sided
Solar.R Month -0.075 -0.906 0.366 144 -0.235 0.088 Pearson’s product-moment correlation two.sided
Solar.R Day -0.150 -1.824 0.070 144 -0.305 0.012 Pearson’s product-moment correlation two.sided
Wind Temp -0.458 -6.331 0.000 151 -0.575 -0.323 Pearson’s product-moment correlation two.sided
Wind Month -0.178 -2.227 0.027 151 -0.328 -0.020 Pearson’s product-moment correlation two.sided
Wind Day 0.027 0.334 0.739 151 -0.132 0.185 Pearson’s product-moment correlation two.sided
Temp Month 0.421 5.703 0.000 151 0.281 0.543 Pearson’s product-moment correlation two.sided
Temp Day -0.131 -1.619 0.108 151 -0.283 0.029 Pearson’s product-moment correlation two.sided
Month Day -0.008 -0.098 0.922 151 -0.166 0.151 Pearson’s product-moment correlation two.sided
LS0tCnRpdGxlOiAiSMaw4bubbmcgZOG6q24gc+G7rSBk4bulbmcgcHVycnIgcGFja2FnZSBwYXJ0IDUiCmF1dGhvcjogIk5ndXnhu4VuIE5n4buNYyBCw6xuaCIKZGF0ZTogIjMwIEFwciAyMDE5IgpvdXRwdXQ6CiAgIGh0bWxfZG9jdW1lbnQ6IAogICAgY29kZV9kb3dubG9hZDogdHJ1ZQogICAgIyBjb2RlX2ZvbGRpbmc6IGhpZGUKICAgIG51bWJlcl9zZWN0aW9uczogeWVzCiAgICB0aGVtZTogImRlZmF1bHQiCiAgICB0b2M6IFRSVUUKICAgIHRvY19mbG9hdDogVFJVRQogICAgZGV2OiAnc3ZnJwotLS0KCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQprbml0cjo6b3B0c19jaHVuayRzZXQoZWNobyA9IFRSVUUpCgpgYGAKCj4gTuG6v3UgxJHDsyBsw6AgY8O0bmcgdmnhu4djIHF1YW4gdHLhu41uZyAtIGjDo3kgdOG7sSBtw6xuaCBsw6BtIGzhuqV5CgpgYGB7cn0KbGlicmFyeSh0aWR5dmVyc2UpCmBgYAoKIyBHaeG7m2kgdGhp4buHdQoKS2jDtG5nIGPDsyBwYWNrYWdlIG7DoG8gbMOgbSBjaG8gdMO0aSDEkeG7jWMgbmhp4buBdSBuaMawIHB1cnJyLiBWw6wgbsOzIHF1w6EgaGF5LCBxdcOhIHRow7RuZyBtaW5oCgojIGFzX21hcHBlcigpIAoKQ2h1eeG7g24gMSBvYmplY3QgdGjDoG5oIDEgbWFwcGVyIGZ1bmN0aW9uCgotIEzDoCAxIGPDoWNoIHZp4bq/dCBraMOhYyBj4bunYSBow6BtCmBgYHtyfQpmX2FkZCA8LSBmdW5jdGlvbih4LCB5KXt4K3l9CgphZGRpdGlvbiA8LSBhc19tYXBwZXIofiAueCArIC55KQphZGRpdGlvbgoKZl9hZGQoMyw0KQphZGRpdGlvbigzLCA0KQoKbWFwMl9pbnQoMTozLCA0OjYsIGFkZGl0aW9uKQptYXAyX2ludCgxOjMsIDQ6NiwgZl9hZGQpCgphc19tYXBwZXIoZl9hZGQpCmBgYAoKLSDEkMahbiBnaeG6o24gaMahbiBraGkgY2h1eeG7g24gxJHhu5VpIDEgb2JqZWN0IHRow6BuaCAxIGjDoG0KYGBge3J9CmdldF9yc3F1YXJlZCA8LSBjb21wb3NlKGFzX21hcHBlcigici5zcXVhcmVkIiksIHN1bW1hcnksIGxtKQoKZ2V0X3JzcXVhcmVkKFNlcGFsLkxlbmd0aCB+IFNlcGFsLldpZHRoLCBkYXRhID0gaXJpcykKYGBgCgpgYGB7cn0KbCA8LSBsaXN0KHJub3JtKDEwKSwKICAgICAgICAgIHJub3JtKDEwMCksIAogICAgICAgICAgcm5vcm0oMTAwMCkpCnBtYXBfZGJsKGxpc3QobCwgMC4xLCBUUlVFKSwgfiBtZWFuKC4uMSwgLi4yLCAuLjMpKSAKYGBgCgojIHJlcnVuKCkKCkNo4bqheSAxIGjDoG0gduG7m2kgbiBs4bqnbiAKCmBgYHtyfQp0cmFpbiA8LSByZXJ1bigyMCwgc2FtcGxlX2ZyYWMoYWlycXVhbGl0eSwgc2l6ZSA9IDAuOCkpCnZhbGlkYXRpb24gPC0gbWFwKHRyYWluLCB+IGFudGlfam9pbihhaXJxdWFsaXR5LCAueCkpCmBgYAoKS2nhu4NtIHRyYSBxdXkgbcO0IG3huqt1IHZhbGlkYXRpb24KYGBge3J9Cm1hcF9pbnQodmFsaWRhdGlvbiwgbnJvdykgJT4lIGV2ZXJ5KH4ueCA9PSAzMSkKYGBgCgoKIyBtYXBfaWYoKSAKCkNo4buJIMOhcCBk4bulbmcga2hpIHRo4buPYSBtw6NuIMSRaeG7gXUga2nhu4duIGPhu6dhIGjDoG0gaWYsIGdp4buvIG5ndXnDqm4gxJHhuqd1IHbDoG8gbuG6v3Uga2jDtG5nIHRo4buPYSBtw6NuIMSRaeG7gXUga2nhu4duIGlmCgpLaeG7g20gxJHhu4tuaCB4ZW0gMSBiaeG6v24gY8OzIHBow6JuIHBo4buRaSBjaHXhuqluIGhheSBraMO0bmcgCmBgYHtyfQptYXBfaWYoaXJpcywgaXMubnVtZXJpYywgc2hhcGlyby50ZXN0KSAKYGBgCgojIFBow6JuIHTDrWNoIHTGsMahbmcgcXVhbiAKClThu5UgaOG7o3AgMiB0w6puIGJp4bq/bgoKYGBge3J9CmxpYnJhcnkodGlkeXN0cmluZ2Rpc3QpICMgV29ya3Mgc2luY2UgdjAuMS4yIApjb21iIDwtIHRpZHlfY29tYl9hbGwobmFtZXMoYWlycXVhbGl0eSkpCmtuaXRyOjprYWJsZShjb21iKQpgYGAKCkLhuqNuZyBjw6FjIGjhu4cgc+G7kSB0xrDGoW5nIHF1YW4gdsOgIGtp4buDbSDEkeG7i25oIMO9IG5naMSpYSBj4bunYSBjw6FjIGjhu4cgc+G7kSAKYGBge3J9CmJ1bGtfY29yIDwtIHBtYXAoY29tYiwgfiBjb3IudGVzdChhaXJxdWFsaXR5W1sueF1dLCBhaXJxdWFsaXR5W1sueV1dKSkgJT4lIAogIG1hcF9kZihicm9vbTo6dGlkeSkgJT4lIAogIGNiaW5kKGNvbWIsIC4pCgprbml0cjo6a2FibGUoYnVsa19jb3IsIGRpZ2l0cyA9IDMpCmBgYAoK