Reshaping data

Sometimes we need to reorganise data from a long table to a wide table with the same information. For example, we want the following data.frame:

set.seed(123)
category <- factor(sample(letters[1:4], 20, replace = TRUE))
numbers1 <- rpois(20, lambda = 2)
numbers2 <- rpois(20, lambda = 5)
rawData <- data.frame(cat = category, n1 = numbers1, n2 = numbers2)
rawData
##    cat n1 n2
## 1    b  4  3
## 2    d  3  4
## 3    b  2  4
## 4    d  6  4
## 5    d  2  3
## 6    a  3  3
## 7    c  2  3
## 8    d  2  5
## 9    c  1  4
## 10   b  1  7
## 11   d  5  2
## 12   b  4  5
## 13   c  3  7
## 14   c  3  2
## 15   a  0  5
## 16   d  2  3
## 17   a  3  3
## 18   a  1  6
## 19   b  1  8
## 20   d  1  4

into the followig format:

##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14
## a  3  3  0  5  3  3  1  6 NA  NA  NA  NA  NA  NA
## b  4  3  2  4  1  7  4  5  1   8  NA  NA  NA  NA
## c  2  3  1  4  3  7  3  2 NA  NA  NA  NA  NA  NA
## d  3  4  6  4  2  3  2  5  5   2   2   3   1   4

To achive this, we can do the following:

Get the categories for the rows:

cats <- sort(unique(rawData$cat))

Get a list with the combined values:

listData <- lapply(cats, function(x) {
    as.vector(t(rawData[rawData$cat == x, 2:3]))
})
names(listData) <- cats
listData
## $a
## [1] 3 3 0 5 3 3 1 6
## 
## $b
##  [1] 4 3 2 4 1 7 4 5 1 8
## 
## $c
## [1] 2 3 1 4 3 7 3 2
## 
## $d
##  [1] 3 4 6 4 2 3 2 5 5 2 2 3 1 4

We can already get the values for each category, if we do not need a matrix or data.frame structure:

listData[["a"]]
## [1] 3 3 0 5 3 3 1 6

Otherwise we can create a matrix with the appropriate dimensions:

maxRows <- max(sapply(listData, length))
matData <- t(sapply(listData, function(x) {
    c(x, rep(NA, maxRows - length(x)))
}))

matData
##   [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## a    3    3    0    5    3    3    1    6   NA    NA    NA    NA    NA
## b    4    3    2    4    1    7    4    5    1     8    NA    NA    NA
## c    2    3    1    4    3    7    3    2   NA    NA    NA    NA    NA
## d    3    4    6    4    2    3    2    5    5     2     2     3     1
##   [,14]
## a    NA
## b    NA
## c    NA
## d     4

And eventually a data.frame:

dfData <- as.data.frame(matData)

dfData
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14
## a  3  3  0  5  3  3  1  6 NA  NA  NA  NA  NA  NA
## b  4  3  2  4  1  7  4  5  1   8  NA  NA  NA  NA
## c  2  3  1  4  3  7  3  2 NA  NA  NA  NA  NA  NA
## d  3  4  6  4  2  3  2  5  5   2   2   3   1   4

Trying to get the same result with reshape2 package:

library(reshape2)
m <- melt(rawData, "cat")
m$id <- ave(m$value, m$cat, FUN = seq_along)
dcast(m, cat ~ id)
##   cat 1 2 3 4 5 6 7 8  9 10 11 12 13 14
## 1   a 3 0 3 1 3 5 3 6 NA NA NA NA NA NA
## 2   b 4 2 1 4 1 3 4 7  5  8 NA NA NA NA
## 3   c 2 1 3 3 3 4 7 2 NA NA NA NA NA NA
## 4   d 3 6 2 2 5 2 1 4  4  3  5  2  3  4

However the order is not the same.