Spread_Gather

library(dplyr)

## 
## 载入程辑包：'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
stocks <- data.frame(
  time = as.Date('2009-01-01') + 0:3,
  X = rnorm(4, 0, 1),
  Y = rnorm(4, 0, 2),
  Z = rnorm(4, 0, 4)
)
stocks$laebl <- LETTERS[1:4]
stocks

##         time          X          Y          Z laebl
## 1 2009-01-01 -0.6907200 -0.6526297  0.2571971     A
## 2 2009-01-02 -0.3985508  1.0834226  1.8481896     B
## 3 2009-01-03  0.4187572 -0.4049702 -0.3250516     C
## 4 2009-01-04 -1.4367165 -3.8984515 -3.4233034     D

stocksm <- stocks %>% gather(stock, price, -c(1,5))
stocksm

##          time laebl stock      price
## 1  2009-01-01     A     X -0.6907200
## 2  2009-01-02     B     X -0.3985508
## 3  2009-01-03     C     X  0.4187572
## 4  2009-01-04     D     X -1.4367165
## 5  2009-01-01     A     Y -0.6526297
## 6  2009-01-02     B     Y  1.0834226
## 7  2009-01-03     C     Y -0.4049702
## 8  2009-01-04     D     Y -3.8984515
## 9  2009-01-01     A     Z  0.2571971
## 10 2009-01-02     B     Z  1.8481896
## 11 2009-01-03     C     Z -0.3250516
## 12 2009-01-04     D     Z -3.4233034

stocksm %>% spread(stock, price)

##         time laebl          X          Y          Z
## 1 2009-01-01     A -0.6907200 -0.6526297  0.2571971
## 2 2009-01-02     B -0.3985508  1.0834226  1.8481896
## 3 2009-01-03     C  0.4187572 -0.4049702 -0.3250516
## 4 2009-01-04     D -1.4367165 -3.8984515 -3.4233034

# Spread and gather are complements
df <- data.frame(x = c("a", "b"), y = c(3, 4), z = c(5, 6))
df

##   x y z
## 1 a 3 5
## 2 b 4 6

df %>% spread(x, y)

##   z  a  b
## 1 5  3 NA
## 2 6 NA  4

df %>% spread(x, y) %>% gather("x", "y", a:b, na.rm = TRUE)

##   z x y
## 1 5 a 3
## 4 6 b 4

# Use 'convert = TRUE' to produce variables of mixed type
df <- data.frame(row = rep(c(1, 51), each = 3),
                 var = c("Sepal.Length", "Species", "Species_num"),
                 value = c(5.1, "setosa", 1, 7.0, "versicolor", 2))
df

##   row          var      value
## 1   1 Sepal.Length        5.1
## 2   1      Species     setosa
## 3   1  Species_num          1
## 4  51 Sepal.Length          7
## 5  51      Species versicolor
## 6  51  Species_num          2

df %>% spread(var, value) %>% str

## 'data.frame':    2 obs. of  4 variables:
##  $ row         : num  1 51
##  $ Sepal.Length: chr  "5.1" "7"
##  $ Species     : chr  "setosa" "versicolor"
##  $ Species_num : chr  "1" "2"

df %>% spread(var, value, convert = TRUE) %>% str

## 'data.frame':    2 obs. of  4 variables:
##  $ row         : num  1 51
##  $ Sepal.Length: num  5.1 7
##  $ Species     : chr  "setosa" "versicolor"
##  $ Species_num : int  1 2

Spread_Gather_tidyr.R

liyix

2021-11-05