Reshaping data.table objects from long to wide

This example demonstrates merging two data.table objects, aggregating some of the data (which results in a long data.table), using expand.grid to create a “balanced” long data.table, and then “reshaping” the aggregated data.table to a wide format.

A <- structure(list(Ord_num_1 = c(100, 101), Ord_date_1 = c("1/1/2013", "1/15/2013"), 
    Part_num = c(1001, 1002), Cust_ID = c(1111, 1111)), .Names = c("Ord_num_1", 
    "Ord_date_1", "Part_num", "Cust_ID"), row.names = 1:2, class = "data.frame")

B <- structure(list(Ord_num_2 = c(200, 201, 202, 203), Ord_date_2 = c("2/1/2013", 
    "3/15/2013", "4/18/2013", "5/1/2013"), Part_num = c(2001, 2002, 2002, 2002), 
    Ord_num_1 = c(100, 100, 100, 101)), .Names = c("Ord_num_2", "Ord_date_2", 
    "Part_num", "Ord_num_1"), row.names = c(NA, 4L), class = "data.frame")

The data look like:

A
##   Ord_num_1 Ord_date_1 Part_num Cust_ID
## 1       100   1/1/2013     1001    1111
## 2       101  1/15/2013     1002    1111
B
##   Ord_num_2 Ord_date_2 Part_num Ord_num_1
## 1       200   2/1/2013     2001       100
## 2       201  3/15/2013     2002       100
## 3       202  4/18/2013     2002       100
## 4       203   5/1/2013     2002       101

Convert the data.frames to data.tables. Set a key on which to merge.

library(data.table)
DTA <- data.table(A, key = "Ord_num_1")
DTB <- data.table(B, key = "Ord_num_1")
DTM <- merge(DTA, DTB)
DTM
##    Ord_num_1 Ord_date_1 Part_num.x Cust_ID Ord_num_2 Ord_date_2 Part_num.y
## 1:       100   1/1/2013       1001    1111       200   2/1/2013       2001
## 2:       100   1/1/2013       1001    1111       201  3/15/2013       2002
## 3:       100   1/1/2013       1001    1111       202  4/18/2013       2002
## 4:       101  1/15/2013       1002    1111       203   5/1/2013       2002

Aggregate the data.

DTML <- DTM[, length(Part_num.x), by = c("Ord_num_1", "Ord_date_1", "Cust_ID", 
    "Part_num.y")]

DTML
##    Ord_num_1 Ord_date_1 Cust_ID Part_num.y V1
## 1:       100   1/1/2013    1111       2001  1
## 2:       100   1/1/2013    1111       2002  2
## 3:       101  1/15/2013    1111       2002  1

These data are “unbalanced”. Use expand.grid to make it balanced.

DTML <- DTML[expand.grid(lapply(DTML, unique)[setdiff(names(DTML), "V1")])]
DTML
##    Ord_num_1 Ord_date_1 Cust_ID Part_num.y V1
## 1:       100   1/1/2013    1111       2001  1
## 2:       101   1/1/2013    1111       2001 NA
## 3:       100  1/15/2013    1111       2001 NA
## 4:       101  1/15/2013    1111       2001 NA
## 5:       100   1/1/2013    1111       2002  2
## 6:       101   1/1/2013    1111       2002 NA
## 7:       100  1/15/2013    1111       2002 NA
## 8:       101  1/15/2013    1111       2002  1

Now, convert it from long to wide, and optionally, remove rows where both columns “2001” and “2002” are NA.

DTMW <- DTML[, as.list(setattr(V1, "names", Part_num.y)), by = eval(setdiff(names(DTML), 
    c("V1", "Part_num.y")))]

DTMW[rowSums(is.na(DTMW[, c("2001", "2002"), with = FALSE])) < 2, ]
##    Ord_num_1 Ord_date_1 Cust_ID 2001 2002
## 1:       100   1/1/2013    1111    1    2
## 2:       101  1/15/2013    1111   NA    1

References