My personal Reshape cheat sheet/ intro. I have taken some examples from the reshape2 paper and great explaination by Sean Andersen.
Reshape uses the concept of the long data format which can then be reshaped, aggregated and summarized as the user wants.
The wide format which we are used to manipulating data in a spreadsheet.
Notice that we have two kinds of information:
1) identifiers. In this case the day and month. 2) values. The measurements. In this case: ozone, solar.r, wind and temp.
library(reshape2)
library(dplyr)
names(airquality) <- tolower(names(airquality))
head(airquality)
## ozone solar.r wind temp month day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
The meltfunction converts the data to the long format which has the structure
id variable value
aqm <- melt(airquality, id = c("month", "day"), na.rm = TRUE)
head(aqm)
## month day variable value
## 1 5 1 ozone 41
## 2 5 2 ozone 36
## 3 5 3 ozone 12
## 4 5 4 ozone 18
## 6 5 6 ozone 28
## 7 5 7 ozone 23
The default column headings of variable value can be defined.
head(melt(airquality, id.vars = c("month", "day"), variable.name = "parameter",
value.name = "number"))
## month day parameter number
## 1 5 1 ozone 41
## 2 5 2 ozone 36
## 3 5 3 ozone 12
## 4 5 4 ozone 18
## 5 5 5 ozone NA
## 6 5 6 ozone 28
dcast(aqm, month ~ variable, mean)
## month ozone solar.r wind temp
## 1 5 23.62 181.3 11.623 65.55
## 2 6 29.44 190.2 10.267 79.10
## 3 7 59.12 216.5 8.942 83.90
## 4 8 59.96 171.9 8.794 83.97
## 5 9 31.45 167.4 10.180 76.90
dcast(aqm, month ~ variable, mean, margins = TRUE)
## month ozone solar.r wind temp (all)
## 1 5 23.62 181.3 11.623 65.55 68.71
## 2 6 29.44 190.2 10.267 79.10 87.38
## 3 7 59.12 216.5 8.942 83.90 93.50
## 4 8 59.96 171.9 8.794 83.97 79.71
## 5 9 31.45 167.4 10.180 76.90 71.83
## 6 (all) 42.13 185.9 9.958 77.88 80.06
library(plyr) # needed to access . function
dcast(aqm, variable ~ month, mean, subset = .(variable == "ozone"))
## variable 5 6 7 8 9
## 1 ozone 23.62 29.44 59.12 59.96 31.45
dcast(aqm, variable ~ month, mean, subset = .(month == 5))
## variable 5
## 1 ozone 23.62
## 2 solar.r 181.30
## 3 wind 11.62
## 4 temp 65.55
mtcars_sub <- mtcars[1:5, c("mpg", "disp")]
mtcars_sub
## mpg disp
## Mazda RX4 21.0 160
## Mazda RX4 Wag 21.0 160
## Datsun 710 22.8 108
## Hornet 4 Drive 21.4 258
## Hornet Sportabout 18.7 360
mtcars_sub$car <- rownames(mtcars_sub)
rownames(mtcars_sub) <- 1:5
dcast(melt(mtcars_sub), variable ~ car)
## Using car as id variables
## variable Datsun 710 Hornet 4 Drive Hornet Sportabout Mazda RX4
## 1 mpg 22.8 21.4 18.7 21
## 2 disp 108.0 258.0 360.0 160
## Mazda RX4 Wag
## 1 21
## 2 160
#Chick weight example
names(ChickWeight) <- tolower(names(ChickWeight))
chick_m <- melt(ChickWeight, id = 2:4, na.rm = TRUE)
dcast(chick_m, time ~ variable, mean) # average effect of time
## time weight
## 1 0 41.06
## 2 2 49.22
## 3 4 59.96
## 4 6 74.31
## 5 8 91.24
## 6 10 107.84
## 7 12 129.24
## 8 14 143.81
## 9 16 168.09
## 10 18 190.19
## 11 20 209.72
## 12 21 218.69
dcast(chick_m, diet ~ variable, mean) # average effect of diet
## diet weight
## 1 1 102.6
## 2 2 122.6
## 3 3 142.9
## 4 4 135.3
acast(chick_m, diet ~ time, mean) # average effect of diet & time
## 0 2 4 6 8 10 12 14 16 18 20 21
## 1 41.4 47.25 56.47 66.79 79.68 93.05 108.5 123.4 144.6 158.9 170.4 177.8
## 2 40.7 49.40 59.80 75.40 91.70 108.50 131.3 141.9 164.7 187.7 205.6 214.7
## 3 40.8 50.40 62.20 77.90 98.40 117.10 144.4 164.5 197.4 233.1 258.9 270.3
## 4 41.0 51.80 64.50 83.90 105.60 126.00 151.4 161.8 182.0 202.9 233.9 238.6