library(plyr)
## Warning: package 'plyr' was built under R version 4.1.2
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.3 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.1.2 ✓ forcats 0.5.1
## Warning: package 'readr' was built under R version 4.1.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::arrange() masks plyr::arrange()
## x purrr::compact() masks plyr::compact()
## x dplyr::count() masks plyr::count()
## x dplyr::failwith() masks plyr::failwith()
## x dplyr::filter() masks stats::filter()
## x dplyr::id() masks plyr::id()
## x dplyr::lag() masks stats::lag()
## x dplyr::mutate() masks plyr::mutate()
## x dplyr::rename() masks plyr::rename()
## x dplyr::summarise() masks plyr::summarise()
## x dplyr::summarize() masks plyr::summarize()
llply(iris,class)
## $Sepal.Length
## [1] "numeric"
##
## $Sepal.Width
## [1] "numeric"
##
## $Petal.Length
## [1] "numeric"
##
## $Petal.Width
## [1] "numeric"
##
## $Species
## [1] "factor"
iris %>% ldply(class)
## .id V1
## 1 Sepal.Length numeric
## 2 Sepal.Width numeric
## 3 Petal.Length numeric
## 4 Petal.Width numeric
## 5 Species factor
iris %>% laply(class)
## [1] "numeric" "numeric" "numeric" "numeric" "factor"
将dataframe 根据某个变量拆分成为几份,然后再调用数据集,可以理解为按行进行拆分
linmod <- function(df) {
lm(rbi ~ year, data = mutate(df, year = year - min(year)))
}
models <- dlply(baseball, .(id), linmod)
# Summarize a dataset by two variables
dfx <- data.frame(
group = c(rep('A', 8), rep('B', 15), rep('C', 6)),
sex = sample(c("M", "F"), size = 29, replace = TRUE),
age = runif(n = 29, min = 18, max = 54)
) %>% head()
# Note the use of the '.' function to allow
# group and sex to be used without quoting
ddply(dfx, .(group, sex), summarize,
mean = round(mean(age), 2),
sd = round(sd(age), 2)) %>% head()
## group sex mean sd
## 1 A F 41.62 4.27
## 2 A M 25.22 3.20
# An example using a formula for .variables
ddply(baseball[1:100,], ~ year, nrow) %>% head()
## year V1
## 1 1871 7
## 2 1872 13
## 3 1873 13
## 4 1874 15
## 5 1875 17
## 6 1876 15
# Applying two functions; nrow and ncol
ddply(baseball, .(lg), c("nrow", "ncol")) %>% head()
## lg nrow ncol
## 1 65 22
## 2 AA 171 22
## 3 AL 10007 22
## 4 FL 37 22
## 5 NL 11378 22
## 6 PL 32 22
# Calculate mean runs batted in for each year
rbi <- ddply(baseball, .(year), summarise,
mean_rbi = mean(rbi, na.rm = TRUE)) %>% head()
# Plot a line chart of the result
plot(mean_rbi ~ year, type = "l", data = rbi)
# make new variable career_year based on the
# start year for each player (id)
base2 <- ddply(baseball, .(id), mutate,
career_year = year - min(year) + 1
) %>% head()
daply(baseball, .(year), nrow) %>% head()
## 1871 1872 1873 1874 1875 1876
## 7 13 13 15 17 15
daply(baseball[, c(2, 6:9)], .(year), colwise(mean)) %>% head()
##
## year g ab r h
## 1871 28 135.8571 33.57143 42.14286
## 1872 29.46154 140.8462 32.15385 42.92308
## 1873 46.30769 217.6154 48.46154 68.53846
## 1874 49 226.7333 44 64.86667
## 1875 57.82353 256.9412 47.17647 73.29412
## 1876 58.66667 258.4 43.26667 72.4
daply(baseball[, 6:9], .(baseball$year), colwise(mean)) %>% head()
##
## baseball$year g ab r h
## 1871 28 135.8571 33.57143 42.14286
## 1872 29.46154 140.8462 32.15385 42.92308
## 1873 46.30769 217.6154 48.46154 68.53846
## 1874 49 226.7333 44 64.86667
## 1875 57.82353 256.9412 47.17647 73.29412
## 1876 58.66667 258.4 43.26667 72.4
daply(baseball, .(year), function(df) colwise(mean)(df[, 6:9])) %>% head()
##
## year g ab r h
## 1871 28 135.8571 33.57143 42.14286
## 1872 29.46154 140.8462 32.15385 42.92308
## 1873 46.30769 217.6154 48.46154 68.53846
## 1874 49 226.7333 44 64.86667
## 1875 57.82353 256.9412 47.17647 73.29412
## 1876 58.66667 258.4 43.26667 72.4
alply(ozone, 3, quantile) %>% head()
## $`1`
## 0% 25% 50% 75% 100%
## 242 248 252 260 312
##
## $`2`
## 0% 25% 50% 75% 100%
## 238 248 252 258 334
##
## $`3`
## 0% 25% 50% 75% 100%
## 240.0 250.0 254.0 260.5 338.0
##
## $`4`
## 0% 25% 50% 75% 100%
## 242 248 252 258 334
##
## $`5`
## 0% 25% 50% 75% 100%
## 242 250 256 266 348
##
## $`6`
## 0% 25% 50% 75% 100%
## 246 256 260 278 330
alply(ozone, 3, function(x) table(round(x))) %>% head()
## $`1`
##
## 242 244 246 248 250 252 254 256 258 260 262 264 266 268 270 272 274 276 278 280
## 7 21 65 89 66 55 50 35 37 16 11 10 8 4 9 6 6 3 1 5
## 282 284 286 288 290 292 294 296 298 300 302 304 306 308 310 312
## 3 4 5 6 3 2 8 2 10 3 2 3 6 3 9 3
##
## $`2`
##
## 238 240 242 244 246 248 250 252 254 256 258 260 262 264 266 268 270 272 274 276
## 1 4 15 28 81 74 68 72 54 31 19 9 6 4 7 11 2 4 6 5
## 278 280 282 284 286 288 290 292 294 296 298 302 304 306 308 310 312 314 316 320
## 4 2 2 8 4 11 5 6 1 2 3 4 1 1 2 2 4 2 2 1
## 322 324 328 330 332 334
## 1 2 1 2 1 1
##
## $`3`
##
## 240 242 244 246 248 250 252 254 256 258 260 262 264 266 268 270 272 274 276 278
## 1 1 1 22 64 78 67 73 56 51 18 11 11 2 2 7 11 4 2 5
## 280 282 284 286 288 290 292 294 296 298 300 302 304 306 310 312 314 322 324 326
## 5 4 9 4 8 6 1 7 4 3 8 1 3 2 2 2 6 1 1 4
## 328 330 338
## 1 3 4
##
## $`4`
##
## 242 244 246 248 250 252 254 256 258 260 262 264 266 268 270 272 274 276 278 280
## 18 47 67 62 81 89 53 12 9 4 6 10 7 2 4 7 5 6 2 3
## 282 284 286 288 290 292 294 296 298 300 302 306 308 310 312 314 316 318 320 322
## 6 3 8 3 1 3 3 1 3 3 7 9 3 1 3 1 2 2 4 7
## 324 326 328 330 332 334
## 1 1 3 1 1 2
##
## $`5`
##
## 242 244 246 248 250 252 254 256 258 260 262 264 266 268 270 272 274 276 278 280
## 12 33 45 34 50 36 57 41 36 27 13 26 23 3 7 9 6 8 3 10
## 282 284 286 288 290 292 294 296 298 300 302 304 306 308 310 312 314 316 318 320
## 4 5 11 4 5 2 9 5 3 3 6 6 2 8 2 2 1 3 3 3
## 326 330 332 344 348
## 4 1 2 1 2
##
## $`6`
##
## 246 248 250 252 254 256 258 260 262 264 266 268 270 272 274 276 278 280 282 284
## 1 2 3 12 69 70 75 67 38 17 15 18 13 11 11 6 7 8 13 9
## 286 288 290 292 294 296 298 300 302 304 306 308 310 312 314 316 318 320 322 324
## 8 5 10 4 7 2 12 8 6 9 2 12 4 2 2 4 4 3 1 1
## 326 328 330
## 1 1 3
adply(ozone, 3, quantile) %>% head()
## time 0% 25% 50% 75% 100%
## 1 1 242 248 252 260.0 312
## 2 2 238 248 252 258.0 334
## 3 3 240 250 254 260.5 338
## 4 4 242 248 252 258.0 334
## 5 5 242 250 256 266.0 348
## 6 6 246 256 260 278.0 330
aaply(ozone, 3, quantile) %>% head()
##
## time 0% 25% 50% 75% 100%
## 1 242 248 252 260.0 312
## 2 238 248 252 258.0 334
## 3 240 250 254 260.5 338
## 4 242 248 252 258.0 334
## 5 242 250 256 266.0 348
## 6 246 256 260 278.0 330