Title

xyply: x는 입력, y는 출력

array
data.frame
list
adply, alply, ddply 등

?.

Quote variables to create a list of unevaluated expressions for later evaluation.

Description: This function is similar to ~ in that it is used to capture the name of variables, not their current value. This is used throughout plyr to specify the names of variables (or more complicated expressions).

Similar tricks can be performed with substitute, but when functions can be called in multiple ways it becomes increasingly tricky to ensure that the values are extracted from the correct frame. Substitute tricks also make it difficult to program against the functions that use them, while the quoted class provides as.quoted.character to convert strings to the appropriate data structure.


library(plyr)
rm(list = ls(all = TRUE))
.(a, b, c)

## List of 3
##  $ a: symbol a
##  $ b: symbol b
##  $ c: symbol c
##  - attr(*, "env")=<environment: R_GlobalEnv> 
##  - attr(*, "class")= chr "quoted"

.(first = a, second = b, third = c)

## List of 3
##  $ first : symbol a
##  $ second: symbol b
##  $ third : symbol c
##  - attr(*, "env")=<environment: R_GlobalEnv> 
##  - attr(*, "class")= chr "quoted"

.(a^2, b - d, log(c))

## List of 3
##  $ a^2   : language a^2
##  $ b - d : language b - d
##  $ log(c): language log(c)
##  - attr(*, "env")=<environment: R_GlobalEnv> 
##  - attr(*, "class")= chr "quoted"

as.quoted(~a + b + c)

## List of 3
##  $ a: symbol a
##  $ b: symbol b
##  $ c: symbol c
##  - attr(*, "env")=<environment: R_GlobalEnv> 
##  - attr(*, "class")= chr "quoted"

as.quoted(a ~ b + c)

## List of 3
##  $ a: symbol a
##  $ b: symbol b
##  $ c: symbol c
##  - attr(*, "env")=<environment: R_GlobalEnv> 
##  - attr(*, "class")= chr "quoted"

as.quoted(c("a", "b", "c"))

## List of 3
##  $ a: symbol a
##  $ b: symbol b
##  $ c: symbol c
##  - attr(*, "env")=<environment: R_GlobalEnv> 
##  - attr(*, "class")= chr "quoted"




# Some examples using ddply - look at the column names

ddply(mtcars, "cyl", each(nrow, ncol))

##   cyl nrow ncol
## 1   4   11   11
## 2   6    7   11
## 3   8   14   11

ddply(mtcars, ~cyl, each(nrow, ncol))

##   cyl nrow ncol
## 1   4   11   11
## 2   6    7   11
## 3   8   14   11

ddply(mtcars, .(cyl), each(nrow, ncol))

##   cyl nrow ncol
## 1   4   11   11
## 2   6    7   11
## 3   8   14   11

ddply(mtcars, .(log(cyl)), each(nrow, ncol))

##   log(cyl) nrow ncol
## 1    1.386   11   11
## 2    1.792    7   11
## 3    2.079   14   11

ddply(mtcars, .(logcyl = log(cyl)), each(nrow, ncol))

##   logcyl nrow ncol
## 1  1.386   11   11
## 2  1.792    7   11
## 3  2.079   14   11

ddply(mtcars, .(vs + am), each(nrow, ncol))

##   vs + am nrow ncol
## 1       0   12   11
## 2       1   13   11
## 3       2    7   11

ddply(mtcars, .(vsam = vs + am), each(nrow, ncol))

##   vsam nrow ncol
## 1    0   12   11
## 2    1   13   11
## 3    2    7   11

?ddply {plyr}

Split data frame, apply function, and return results in a data frame.

Description: For each subset of a data frame, apply function then combine results into a data frame.


# Summarize a dataset by two variables

rm(list = ls(all = TRUE))

require(plyr)
dfx <- data.frame(group = c(rep("A", 8), rep("B", 15), rep("C", 6)), sex = sample(c("M", 
    "F"), size = 29, replace = TRUE), age = runif(n = 29, min = 18, max = 54))

# Note the use of the '.' function to allow group and sex to be used without
# quoting

ddply(dfx, .(group, sex), summarize, mean = round(mean(age), 2), sd = round(sd(age), 
    2))

##   group sex  mean    sd
## 1     A   F 41.54  8.93
## 2     A   M 33.00 15.98
## 3     B   F 31.29 12.31
## 4     B   M 32.80 11.93
## 5     C   F 39.26 10.97


# An example using a formula for .variables

ddply(baseball[1:100, ], ~year, nrow)

##   year V1
## 1 1871  7
## 2 1872 13
## 3 1873 13
## 4 1874 15
## 5 1875 17
## 6 1876 15
## 7 1877 17
## 8 1878  3


# Applying two functions; nrow and ncol

ddply(baseball, .(lg), c("nrow", "ncol"))

##   lg  nrow ncol
## 1       65   22
## 2 AA   171   22
## 3 AL 10007   22
## 4 FL    37   22
## 5 NL 11378   22
## 6 PL    32   22
## 7 UA     9   22


# Calculate mean runs batted in for each year

rbi <- ddply(baseball, .(year), summarise, mean_rbi = mean(rbi, na.rm = TRUE))

# Plot a line chart of the result

plot(mean_rbi ~ year, type = "l", data = rbi)

plot of chunk unnamed-chunk-2


# make new variable career_year based on the start year for each player (id)

base2 <- ddply(baseball, .(id), mutate, career_year = year - min(year) + 1)