library(reshape2)
library(tidyr)
library(dplyr, warn.conflicts = FALSE)
library(knitr)

Air quality example

Raw data

names(airquality) <- tolower(names(airquality))
airquality %>% head() %>% kable(row.names = FALSE)
ozone solar.r wind temp month day
41 190 7.4 67 5 1
36 118 8.0 72 5 2
12 149 12.6 74 5 3
18 313 11.5 62 5 4
NA NA 14.3 56 5 5
28 NA 14.9 66 5 6
airquality %>% tail() %>% kable(row.names = FALSE)
ozone solar.r wind temp month day
14 20 16.6 63 9 25
30 193 6.9 70 9 26
NA 145 13.2 77 9 27
14 191 14.3 75 9 28
18 131 8.0 76 9 29
20 223 11.5 68 9 30

Monthly mean with reshape2

aqm <- airquality %>% 
    melt(id=c("month", "day"), na.rm=TRUE)
aqm %>% 
    dcast(month ~ variable, 
          mean, 
          margins = c("month", "variable")) %>%
    kable()
month ozone solar.r wind temp (all)
5 23.61538 181.2963 11.622581 65.54839 68.70696
6 29.44444 190.1667 10.266667 79.10000 87.38384
7 59.11538 216.4839 8.941935 83.90323 93.49748
8 59.96154 171.8571 8.793548 83.96774 79.71207
9 31.44828 167.4333 10.180000 76.90000 71.82689
(all) 42.12931 185.9315 9.957516 77.88235 80.05722

Monthly mean with tidyr and dplyr

aqg <- airquality %>% 
    gather(variable, value, -month, -day, na.rm = TRUE) 
aqg %>% 
    group_by(month, variable) %>%
    summarise(value = mean(value)) %>%
    spread(variable, value) %>%
    kable()
month ozone solar.r wind temp
5 23.61538 181.2963 11.622581 65.54839
6 29.44444 190.1667 10.266667 79.10000
7 59.11538 216.4839 8.941935 83.90323
8 59.96154 171.8571 8.793548 83.96774
9 31.44828 167.4333 10.180000 76.90000

French Fries example

Raw data

output of help(french_fries)

This data was collected from a sensory experiment conducted at Iowa State University in 2004. The investigators were interested in the effect of using three different fryer oils had on the taste of the fries.

Variables:

  • time in weeks from start of study.
  • treatment (type of oil),
  • subject,
  • replicate,
  • potato-y flavour,
  • buttery flavour,
  • grassy flavour,
  • rancid flavour,
  • painty flavour
french_fries %>% head() %>% kable(row.names=FALSE)
time treatment subject rep potato buttery grassy rancid painty
1 1 3 1 2.9 0.0 0.0 0.0 5.5
1 1 3 2 14.0 0.0 0.0 1.1 0.0
1 1 10 1 11.0 6.4 0.0 0.0 0.0
1 1 10 2 9.9 5.9 2.9 2.2 0.0
1 1 15 1 1.2 0.1 0.0 1.1 5.1
1 1 15 2 8.8 3.0 3.6 1.5 2.3
french_fries %>% tail() %>% kable(row.names=FALSE)
time treatment subject rep potato buttery grassy rancid painty
10 3 63 1 7.0 0 0.0 11.9 3.3
10 3 63 2 7.0 0 0.0 11.4 7.3
10 3 78 1 3.7 0 0.9 0.0 1.3
10 3 78 2 3.3 0 0.0 2.5 1.4
10 3 86 1 2.5 0 0.0 7.0 10.5
10 3 86 2 2.5 0 0.0 8.2 9.4

treatment + subject ~ flavour with reshape2

ff_d <- french_fries %>% 
    melt(id=1:4, na.rm=TRUE)
ff_d %>% 
    dcast(treatment + subject ~ variable, 
          mean, 
          margins="treatment") %>%
    kable()
treatment subject potato buttery grassy rancid painty
1 3 6.216667 0.3722222 0.1888889 2.1055556 3.1111111
1 10 9.955000 6.7500000 0.5850000 4.0200000 1.3750000
1 15 3.360000 0.7200000 0.4200000 3.9650000 3.2600000
1 16 6.495000 3.2600000 0.7550000 4.1200000 1.2300000
1 19 9.385000 3.0550000 2.0200000 5.3600000 2.7750000
1 31 8.844444 0.4444444 0.0888889 5.9444444 3.2111111
1 51 10.675000 2.6400000 1.0500000 5.1500000 1.9550000
1 52 5.060000 0.8050000 0.8750000 4.2850000 2.6450000
1 63 6.775000 0.0250000 0.0000000 6.0550000 3.8550000
1 78 3.620000 0.7350000 0.5400000 1.5050000 3.4900000
1 79 8.061111 0.2823529 0.3444444 0.5666667 0.0000000
1 86 4.183333 1.7722222 0.8055556 5.4944444 4.1055556
2 3 6.738889 0.5888889 0.1055556 3.1388889 2.4777778
2 10 9.995000 6.9800000 0.4750000 2.1500000 0.8200000
2 15 4.405000 1.3150000 0.3400000 2.2850000 2.0600000
2 16 6.450000 3.3736842 1.0550000 3.4000000 0.4550000
2 19 8.640000 2.4500000 1.1350000 5.4050000 4.1550000
2 31 8.033333 0.6166667 0.1555556 6.0500000 5.0611111
2 51 9.985000 3.7950000 1.5700000 4.6700000 2.2550000
2 52 5.515000 1.0250000 1.1800000 4.2250000 2.1950000
2 63 8.415000 0.1050000 0.0100000 5.0900000 4.3550000
2 78 3.780000 0.2950000 0.7550000 1.5500000 2.7250000
2 79 7.938889 0.6941176 0.2555556 1.0333333 0.0000000
2 86 3.994444 2.0611111 0.7833333 4.5222222 2.8444444
3 3 5.294444 0.7666667 0.0944444 2.8555556 2.8666667
3 10 10.030000 6.4500000 0.1450000 3.1100000 0.6900000
3 15 3.963158 0.9894737 0.4421053 2.5473684 2.3684211
3 16 6.860000 2.7000000 1.1250000 3.2000000 0.5550000
3 19 8.740000 1.7250000 2.0700000 7.2400000 3.9050000
3 31 9.027778 0.6500000 0.1722222 6.5777778 5.1277778
3 51 10.220000 3.1300000 1.3500000 4.9150000 2.5450000
3 52 5.475000 0.8650000 0.7650000 3.1600000 2.6600000
3 63 8.060000 0.0650000 0.1250000 6.1850000 3.1000000
3 78 4.000000 0.7050000 0.6650000 1.1850000 3.5200000
3 79 7.733333 0.5722222 0.1166667 1.1777778 0.0277778
3 86 3.866667 1.6333333 0.9444444 4.1055556 3.0277778
(all) (all) 6.952518 1.8236994 0.6641727 3.8522302 2.5217579

treatment + subject ~ flavour with tidyr and dplyr

ff_g <- french_fries %>%
    gather(flavour, value, -c(time:rep), na.rm=TRUE)
ff_g %>% 
    group_by(treatment, subject, flavour) %>%
    summarise(value = mean(value)) %>%
    spread(flavour, value) %>%
    kable()
treatment subject potato buttery grassy rancid painty
1 3 6.216667 0.3722222 0.1888889 2.1055556 3.1111111
1 10 9.955000 6.7500000 0.5850000 4.0200000 1.3750000
1 15 3.360000 0.7200000 0.4200000 3.9650000 3.2600000
1 16 6.495000 3.2600000 0.7550000 4.1200000 1.2300000
1 19 9.385000 3.0550000 2.0200000 5.3600000 2.7750000
1 31 8.844444 0.4444444 0.0888889 5.9444444 3.2111111
1 51 10.675000 2.6400000 1.0500000 5.1500000 1.9550000
1 52 5.060000 0.8050000 0.8750000 4.2850000 2.6450000
1 63 6.775000 0.0250000 0.0000000 6.0550000 3.8550000
1 78 3.620000 0.7350000 0.5400000 1.5050000 3.4900000
1 79 8.061111 0.2823529 0.3444444 0.5666667 0.0000000
1 86 4.183333 1.7722222 0.8055556 5.4944444 4.1055556
2 3 6.738889 0.5888889 0.1055556 3.1388889 2.4777778
2 10 9.995000 6.9800000 0.4750000 2.1500000 0.8200000
2 15 4.405000 1.3150000 0.3400000 2.2850000 2.0600000
2 16 6.450000 3.3736842 1.0550000 3.4000000 0.4550000
2 19 8.640000 2.4500000 1.1350000 5.4050000 4.1550000
2 31 8.033333 0.6166667 0.1555556 6.0500000 5.0611111
2 51 9.985000 3.7950000 1.5700000 4.6700000 2.2550000
2 52 5.515000 1.0250000 1.1800000 4.2250000 2.1950000
2 63 8.415000 0.1050000 0.0100000 5.0900000 4.3550000
2 78 3.780000 0.2950000 0.7550000 1.5500000 2.7250000
2 79 7.938889 0.6941176 0.2555556 1.0333333 0.0000000
2 86 3.994444 2.0611111 0.7833333 4.5222222 2.8444444
3 3 5.294444 0.7666667 0.0944444 2.8555556 2.8666667
3 10 10.030000 6.4500000 0.1450000 3.1100000 0.6900000
3 15 3.963158 0.9894737 0.4421053 2.5473684 2.3684211
3 16 6.860000 2.7000000 1.1250000 3.2000000 0.5550000
3 19 8.740000 1.7250000 2.0700000 7.2400000 3.9050000
3 31 9.027778 0.6500000 0.1722222 6.5777778 5.1277778
3 51 10.220000 3.1300000 1.3500000 4.9150000 2.5450000
3 52 5.475000 0.8650000 0.7650000 3.1600000 2.6600000
3 63 8.060000 0.0650000 0.1250000 6.1850000 3.1000000
3 78 4.000000 0.7050000 0.6650000 1.1850000 3.5200000
3 79 7.733333 0.5722222 0.1166667 1.1777778 0.0277778
3 86 3.866667 1.6333333 0.9444444 4.1055556 3.0277778

Using gather(), group_by(), summarise(), spread() instead of group_by() summarise_each()

iris %>%
    group_by(Species) %>%
    summarise_each(funs(mean))
## Source: local data frame [3 x 5]
## 
##      Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     setosa        5.006       3.428        1.462       0.246
## 2 versicolor        5.936       2.770        4.260       1.326
## 3  virginica        6.588       2.974        5.552       2.026
# 
iris %>% 
    gather(variable, value,-Species) %>%
    group_by(Species, variable) %>%
    summarise(value = mean(value)) %>%
    spread(variable, value)
## Source: local data frame [3 x 5]
## 
##      Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     setosa        5.006       3.428        1.462       0.246
## 2 versicolor        5.936       2.770        4.260       1.326
## 3  virginica        6.588       2.974        5.552       2.026