Data manipulation with reshape2

My personal Reshape cheat sheet/ intro. I have taken some examples from the reshape2 paper and great explaination by Sean Andersen.

Reshaping data

Reshape uses the concept of the long data format which can then be reshaped, aggregated and summarized as the user wants.

Wide format

The wide format which we are used to manipulating data in a spreadsheet.

Notice that we have two kinds of information:

1) identifiers. In this case the day and month. 2) values. The measurements. In this case: ozone, solar.r, wind and temp.

library(reshape2)
library(dplyr)
names(airquality) <- tolower(names(airquality))
head(airquality)
##   ozone solar.r wind temp month day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6

Long format: melt function

The meltfunction converts the data to the long format which has the structure

id variable value

aqm <- melt(airquality, id = c("month", "day"), na.rm = TRUE)
head(aqm)
##   month day variable value
## 1     5   1    ozone    41
## 2     5   2    ozone    36
## 3     5   3    ozone    12
## 4     5   4    ozone    18
## 6     5   6    ozone    28
## 7     5   7    ozone    23

The default column headings of variable value can be defined.

head(melt(airquality, id.vars = c("month", "day"), variable.name = "parameter", 
    value.name = "number"))
##   month day parameter number
## 1     5   1     ozone     41
## 2     5   2     ozone     36
## 3     5   3     ozone     12
## 4     5   4     ozone     18
## 5     5   5     ozone     NA
## 6     5   6     ozone     28

Reshaping: cast

dcast(aqm, month ~ variable, mean)
##   month ozone solar.r   wind  temp
## 1     5 23.62   181.3 11.623 65.55
## 2     6 29.44   190.2 10.267 79.10
## 3     7 59.12   216.5  8.942 83.90
## 4     8 59.96   171.9  8.794 83.97
## 5     9 31.45   167.4 10.180 76.90
dcast(aqm, month ~ variable, mean, margins = TRUE)
##   month ozone solar.r   wind  temp (all)
## 1     5 23.62   181.3 11.623 65.55 68.71
## 2     6 29.44   190.2 10.267 79.10 87.38
## 3     7 59.12   216.5  8.942 83.90 93.50
## 4     8 59.96   171.9  8.794 83.97 79.71
## 5     9 31.45   167.4 10.180 76.90 71.83
## 6 (all) 42.13   185.9  9.958 77.88 80.06
library(plyr)  # needed to access . function
dcast(aqm, variable ~ month, mean, subset = .(variable == "ozone"))
##   variable     5     6     7     8     9
## 1    ozone 23.62 29.44 59.12 59.96 31.45
dcast(aqm, variable ~ month, mean, subset = .(month == 5))
##   variable      5
## 1    ozone  23.62
## 2  solar.r 181.30
## 3     wind  11.62
## 4     temp  65.55

TRANSPOSE a data.frame with Reshape

mtcars_sub <- mtcars[1:5, c("mpg", "disp")]
mtcars_sub
##                    mpg disp
## Mazda RX4         21.0  160
## Mazda RX4 Wag     21.0  160
## Datsun 710        22.8  108
## Hornet 4 Drive    21.4  258
## Hornet Sportabout 18.7  360
mtcars_sub$car <- rownames(mtcars_sub)
rownames(mtcars_sub) <- 1:5
dcast(melt(mtcars_sub), variable ~ car)
## Using car as id variables
##   variable Datsun 710 Hornet 4 Drive Hornet Sportabout Mazda RX4
## 1      mpg       22.8           21.4              18.7        21
## 2     disp      108.0          258.0             360.0       160
##   Mazda RX4 Wag
## 1            21
## 2           160

#Chick weight example

names(ChickWeight) <- tolower(names(ChickWeight))
chick_m <- melt(ChickWeight, id = 2:4, na.rm = TRUE)
dcast(chick_m, time ~ variable, mean)  # average effect of time
##    time weight
## 1     0  41.06
## 2     2  49.22
## 3     4  59.96
## 4     6  74.31
## 5     8  91.24
## 6    10 107.84
## 7    12 129.24
## 8    14 143.81
## 9    16 168.09
## 10   18 190.19
## 11   20 209.72
## 12   21 218.69
dcast(chick_m, diet ~ variable, mean)  # average effect of diet
##   diet weight
## 1    1  102.6
## 2    2  122.6
## 3    3  142.9
## 4    4  135.3
acast(chick_m, diet ~ time, mean)  # average effect of diet & time
##      0     2     4     6      8     10    12    14    16    18    20    21
## 1 41.4 47.25 56.47 66.79  79.68  93.05 108.5 123.4 144.6 158.9 170.4 177.8
## 2 40.7 49.40 59.80 75.40  91.70 108.50 131.3 141.9 164.7 187.7 205.6 214.7
## 3 40.8 50.40 62.20 77.90  98.40 117.10 144.4 164.5 197.4 233.1 258.9 270.3
## 4 41.0 51.80 64.50 83.90 105.60 126.00 151.4 161.8 182.0 202.9 233.9 238.6