library(dplyr)
##
## 载入程辑包:'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
stocks <- data.frame(
time = as.Date('2009-01-01') + 0:3,
X = rnorm(4, 0, 1),
Y = rnorm(4, 0, 2),
Z = rnorm(4, 0, 4)
)
stocks$laebl <- LETTERS[1:4]
stocks
## time X Y Z laebl
## 1 2009-01-01 -0.6907200 -0.6526297 0.2571971 A
## 2 2009-01-02 -0.3985508 1.0834226 1.8481896 B
## 3 2009-01-03 0.4187572 -0.4049702 -0.3250516 C
## 4 2009-01-04 -1.4367165 -3.8984515 -3.4233034 D
stocksm <- stocks %>% gather(stock, price, -c(1,5))
stocksm
## time laebl stock price
## 1 2009-01-01 A X -0.6907200
## 2 2009-01-02 B X -0.3985508
## 3 2009-01-03 C X 0.4187572
## 4 2009-01-04 D X -1.4367165
## 5 2009-01-01 A Y -0.6526297
## 6 2009-01-02 B Y 1.0834226
## 7 2009-01-03 C Y -0.4049702
## 8 2009-01-04 D Y -3.8984515
## 9 2009-01-01 A Z 0.2571971
## 10 2009-01-02 B Z 1.8481896
## 11 2009-01-03 C Z -0.3250516
## 12 2009-01-04 D Z -3.4233034
stocksm %>% spread(stock, price)
## time laebl X Y Z
## 1 2009-01-01 A -0.6907200 -0.6526297 0.2571971
## 2 2009-01-02 B -0.3985508 1.0834226 1.8481896
## 3 2009-01-03 C 0.4187572 -0.4049702 -0.3250516
## 4 2009-01-04 D -1.4367165 -3.8984515 -3.4233034
# Spread and gather are complements
df <- data.frame(x = c("a", "b"), y = c(3, 4), z = c(5, 6))
df
## x y z
## 1 a 3 5
## 2 b 4 6
df %>% spread(x, y)
## z a b
## 1 5 3 NA
## 2 6 NA 4
df %>% spread(x, y) %>% gather("x", "y", a:b, na.rm = TRUE)
## z x y
## 1 5 a 3
## 4 6 b 4
# Use 'convert = TRUE' to produce variables of mixed type
df <- data.frame(row = rep(c(1, 51), each = 3),
var = c("Sepal.Length", "Species", "Species_num"),
value = c(5.1, "setosa", 1, 7.0, "versicolor", 2))
df
## row var value
## 1 1 Sepal.Length 5.1
## 2 1 Species setosa
## 3 1 Species_num 1
## 4 51 Sepal.Length 7
## 5 51 Species versicolor
## 6 51 Species_num 2
df %>% spread(var, value) %>% str
## 'data.frame': 2 obs. of 4 variables:
## $ row : num 1 51
## $ Sepal.Length: chr "5.1" "7"
## $ Species : chr "setosa" "versicolor"
## $ Species_num : chr "1" "2"
df %>% spread(var, value, convert = TRUE) %>% str
## 'data.frame': 2 obs. of 4 variables:
## $ row : num 1 51
## $ Sepal.Length: num 5.1 7
## $ Species : chr "setosa" "versicolor"
## $ Species_num : int 1 2