library(reshape2)
library(tidyr)
library(dplyr, warn.conflicts = FALSE)
library(knitr)
Air quality example
Raw data
names(airquality) <- tolower(names(airquality))
airquality %>% head() %>% kable(row.names = FALSE)
41 |
190 |
7.4 |
67 |
5 |
1 |
36 |
118 |
8.0 |
72 |
5 |
2 |
12 |
149 |
12.6 |
74 |
5 |
3 |
18 |
313 |
11.5 |
62 |
5 |
4 |
NA |
NA |
14.3 |
56 |
5 |
5 |
28 |
NA |
14.9 |
66 |
5 |
6 |
airquality %>% tail() %>% kable(row.names = FALSE)
14 |
20 |
16.6 |
63 |
9 |
25 |
30 |
193 |
6.9 |
70 |
9 |
26 |
NA |
145 |
13.2 |
77 |
9 |
27 |
14 |
191 |
14.3 |
75 |
9 |
28 |
18 |
131 |
8.0 |
76 |
9 |
29 |
20 |
223 |
11.5 |
68 |
9 |
30 |
Monthly mean with reshape2
aqm <- airquality %>%
melt(id=c("month", "day"), na.rm=TRUE)
aqm %>%
dcast(month ~ variable,
mean,
margins = c("month", "variable")) %>%
kable()
5 |
23.61538 |
181.2963 |
11.622581 |
65.54839 |
68.70696 |
6 |
29.44444 |
190.1667 |
10.266667 |
79.10000 |
87.38384 |
7 |
59.11538 |
216.4839 |
8.941935 |
83.90323 |
93.49748 |
8 |
59.96154 |
171.8571 |
8.793548 |
83.96774 |
79.71207 |
9 |
31.44828 |
167.4333 |
10.180000 |
76.90000 |
71.82689 |
(all) |
42.12931 |
185.9315 |
9.957516 |
77.88235 |
80.05722 |
Monthly mean with tidyr and dplyr
aqg <- airquality %>%
gather(variable, value, -month, -day, na.rm = TRUE)
aqg %>%
group_by(month, variable) %>%
summarise(value = mean(value)) %>%
spread(variable, value) %>%
kable()
5 |
23.61538 |
181.2963 |
11.622581 |
65.54839 |
6 |
29.44444 |
190.1667 |
10.266667 |
79.10000 |
7 |
59.11538 |
216.4839 |
8.941935 |
83.90323 |
8 |
59.96154 |
171.8571 |
8.793548 |
83.96774 |
9 |
31.44828 |
167.4333 |
10.180000 |
76.90000 |
French Fries example
Raw data
output of help(french_fries)
This data was collected from a sensory experiment conducted at Iowa State University in 2004. The investigators were interested in the effect of using three different fryer oils had on the taste of the fries.
Variables:
- time in weeks from start of study.
- treatment (type of oil),
- subject,
- replicate,
- potato-y flavour,
- buttery flavour,
- grassy flavour,
- rancid flavour,
- painty flavour
french_fries %>% head() %>% kable(row.names=FALSE)
1 |
1 |
3 |
1 |
2.9 |
0.0 |
0.0 |
0.0 |
5.5 |
1 |
1 |
3 |
2 |
14.0 |
0.0 |
0.0 |
1.1 |
0.0 |
1 |
1 |
10 |
1 |
11.0 |
6.4 |
0.0 |
0.0 |
0.0 |
1 |
1 |
10 |
2 |
9.9 |
5.9 |
2.9 |
2.2 |
0.0 |
1 |
1 |
15 |
1 |
1.2 |
0.1 |
0.0 |
1.1 |
5.1 |
1 |
1 |
15 |
2 |
8.8 |
3.0 |
3.6 |
1.5 |
2.3 |
french_fries %>% tail() %>% kable(row.names=FALSE)
10 |
3 |
63 |
1 |
7.0 |
0 |
0.0 |
11.9 |
3.3 |
10 |
3 |
63 |
2 |
7.0 |
0 |
0.0 |
11.4 |
7.3 |
10 |
3 |
78 |
1 |
3.7 |
0 |
0.9 |
0.0 |
1.3 |
10 |
3 |
78 |
2 |
3.3 |
0 |
0.0 |
2.5 |
1.4 |
10 |
3 |
86 |
1 |
2.5 |
0 |
0.0 |
7.0 |
10.5 |
10 |
3 |
86 |
2 |
2.5 |
0 |
0.0 |
8.2 |
9.4 |
treatment + subject ~ flavour with reshape2
ff_d <- french_fries %>%
melt(id=1:4, na.rm=TRUE)
ff_d %>%
dcast(treatment + subject ~ variable,
mean,
margins="treatment") %>%
kable()
1 |
3 |
6.216667 |
0.3722222 |
0.1888889 |
2.1055556 |
3.1111111 |
1 |
10 |
9.955000 |
6.7500000 |
0.5850000 |
4.0200000 |
1.3750000 |
1 |
15 |
3.360000 |
0.7200000 |
0.4200000 |
3.9650000 |
3.2600000 |
1 |
16 |
6.495000 |
3.2600000 |
0.7550000 |
4.1200000 |
1.2300000 |
1 |
19 |
9.385000 |
3.0550000 |
2.0200000 |
5.3600000 |
2.7750000 |
1 |
31 |
8.844444 |
0.4444444 |
0.0888889 |
5.9444444 |
3.2111111 |
1 |
51 |
10.675000 |
2.6400000 |
1.0500000 |
5.1500000 |
1.9550000 |
1 |
52 |
5.060000 |
0.8050000 |
0.8750000 |
4.2850000 |
2.6450000 |
1 |
63 |
6.775000 |
0.0250000 |
0.0000000 |
6.0550000 |
3.8550000 |
1 |
78 |
3.620000 |
0.7350000 |
0.5400000 |
1.5050000 |
3.4900000 |
1 |
79 |
8.061111 |
0.2823529 |
0.3444444 |
0.5666667 |
0.0000000 |
1 |
86 |
4.183333 |
1.7722222 |
0.8055556 |
5.4944444 |
4.1055556 |
2 |
3 |
6.738889 |
0.5888889 |
0.1055556 |
3.1388889 |
2.4777778 |
2 |
10 |
9.995000 |
6.9800000 |
0.4750000 |
2.1500000 |
0.8200000 |
2 |
15 |
4.405000 |
1.3150000 |
0.3400000 |
2.2850000 |
2.0600000 |
2 |
16 |
6.450000 |
3.3736842 |
1.0550000 |
3.4000000 |
0.4550000 |
2 |
19 |
8.640000 |
2.4500000 |
1.1350000 |
5.4050000 |
4.1550000 |
2 |
31 |
8.033333 |
0.6166667 |
0.1555556 |
6.0500000 |
5.0611111 |
2 |
51 |
9.985000 |
3.7950000 |
1.5700000 |
4.6700000 |
2.2550000 |
2 |
52 |
5.515000 |
1.0250000 |
1.1800000 |
4.2250000 |
2.1950000 |
2 |
63 |
8.415000 |
0.1050000 |
0.0100000 |
5.0900000 |
4.3550000 |
2 |
78 |
3.780000 |
0.2950000 |
0.7550000 |
1.5500000 |
2.7250000 |
2 |
79 |
7.938889 |
0.6941176 |
0.2555556 |
1.0333333 |
0.0000000 |
2 |
86 |
3.994444 |
2.0611111 |
0.7833333 |
4.5222222 |
2.8444444 |
3 |
3 |
5.294444 |
0.7666667 |
0.0944444 |
2.8555556 |
2.8666667 |
3 |
10 |
10.030000 |
6.4500000 |
0.1450000 |
3.1100000 |
0.6900000 |
3 |
15 |
3.963158 |
0.9894737 |
0.4421053 |
2.5473684 |
2.3684211 |
3 |
16 |
6.860000 |
2.7000000 |
1.1250000 |
3.2000000 |
0.5550000 |
3 |
19 |
8.740000 |
1.7250000 |
2.0700000 |
7.2400000 |
3.9050000 |
3 |
31 |
9.027778 |
0.6500000 |
0.1722222 |
6.5777778 |
5.1277778 |
3 |
51 |
10.220000 |
3.1300000 |
1.3500000 |
4.9150000 |
2.5450000 |
3 |
52 |
5.475000 |
0.8650000 |
0.7650000 |
3.1600000 |
2.6600000 |
3 |
63 |
8.060000 |
0.0650000 |
0.1250000 |
6.1850000 |
3.1000000 |
3 |
78 |
4.000000 |
0.7050000 |
0.6650000 |
1.1850000 |
3.5200000 |
3 |
79 |
7.733333 |
0.5722222 |
0.1166667 |
1.1777778 |
0.0277778 |
3 |
86 |
3.866667 |
1.6333333 |
0.9444444 |
4.1055556 |
3.0277778 |
(all) |
(all) |
6.952518 |
1.8236994 |
0.6641727 |
3.8522302 |
2.5217579 |
treatment + subject ~ flavour with tidyr and dplyr
ff_g <- french_fries %>%
gather(flavour, value, -c(time:rep), na.rm=TRUE)
ff_g %>%
group_by(treatment, subject, flavour) %>%
summarise(value = mean(value)) %>%
spread(flavour, value) %>%
kable()
1 |
3 |
6.216667 |
0.3722222 |
0.1888889 |
2.1055556 |
3.1111111 |
1 |
10 |
9.955000 |
6.7500000 |
0.5850000 |
4.0200000 |
1.3750000 |
1 |
15 |
3.360000 |
0.7200000 |
0.4200000 |
3.9650000 |
3.2600000 |
1 |
16 |
6.495000 |
3.2600000 |
0.7550000 |
4.1200000 |
1.2300000 |
1 |
19 |
9.385000 |
3.0550000 |
2.0200000 |
5.3600000 |
2.7750000 |
1 |
31 |
8.844444 |
0.4444444 |
0.0888889 |
5.9444444 |
3.2111111 |
1 |
51 |
10.675000 |
2.6400000 |
1.0500000 |
5.1500000 |
1.9550000 |
1 |
52 |
5.060000 |
0.8050000 |
0.8750000 |
4.2850000 |
2.6450000 |
1 |
63 |
6.775000 |
0.0250000 |
0.0000000 |
6.0550000 |
3.8550000 |
1 |
78 |
3.620000 |
0.7350000 |
0.5400000 |
1.5050000 |
3.4900000 |
1 |
79 |
8.061111 |
0.2823529 |
0.3444444 |
0.5666667 |
0.0000000 |
1 |
86 |
4.183333 |
1.7722222 |
0.8055556 |
5.4944444 |
4.1055556 |
2 |
3 |
6.738889 |
0.5888889 |
0.1055556 |
3.1388889 |
2.4777778 |
2 |
10 |
9.995000 |
6.9800000 |
0.4750000 |
2.1500000 |
0.8200000 |
2 |
15 |
4.405000 |
1.3150000 |
0.3400000 |
2.2850000 |
2.0600000 |
2 |
16 |
6.450000 |
3.3736842 |
1.0550000 |
3.4000000 |
0.4550000 |
2 |
19 |
8.640000 |
2.4500000 |
1.1350000 |
5.4050000 |
4.1550000 |
2 |
31 |
8.033333 |
0.6166667 |
0.1555556 |
6.0500000 |
5.0611111 |
2 |
51 |
9.985000 |
3.7950000 |
1.5700000 |
4.6700000 |
2.2550000 |
2 |
52 |
5.515000 |
1.0250000 |
1.1800000 |
4.2250000 |
2.1950000 |
2 |
63 |
8.415000 |
0.1050000 |
0.0100000 |
5.0900000 |
4.3550000 |
2 |
78 |
3.780000 |
0.2950000 |
0.7550000 |
1.5500000 |
2.7250000 |
2 |
79 |
7.938889 |
0.6941176 |
0.2555556 |
1.0333333 |
0.0000000 |
2 |
86 |
3.994444 |
2.0611111 |
0.7833333 |
4.5222222 |
2.8444444 |
3 |
3 |
5.294444 |
0.7666667 |
0.0944444 |
2.8555556 |
2.8666667 |
3 |
10 |
10.030000 |
6.4500000 |
0.1450000 |
3.1100000 |
0.6900000 |
3 |
15 |
3.963158 |
0.9894737 |
0.4421053 |
2.5473684 |
2.3684211 |
3 |
16 |
6.860000 |
2.7000000 |
1.1250000 |
3.2000000 |
0.5550000 |
3 |
19 |
8.740000 |
1.7250000 |
2.0700000 |
7.2400000 |
3.9050000 |
3 |
31 |
9.027778 |
0.6500000 |
0.1722222 |
6.5777778 |
5.1277778 |
3 |
51 |
10.220000 |
3.1300000 |
1.3500000 |
4.9150000 |
2.5450000 |
3 |
52 |
5.475000 |
0.8650000 |
0.7650000 |
3.1600000 |
2.6600000 |
3 |
63 |
8.060000 |
0.0650000 |
0.1250000 |
6.1850000 |
3.1000000 |
3 |
78 |
4.000000 |
0.7050000 |
0.6650000 |
1.1850000 |
3.5200000 |
3 |
79 |
7.733333 |
0.5722222 |
0.1166667 |
1.1777778 |
0.0277778 |
3 |
86 |
3.866667 |
1.6333333 |
0.9444444 |
4.1055556 |
3.0277778 |
Using gather(), group_by(), summarise(), spread() instead of group_by() summarise_each()
iris %>%
group_by(Species) %>%
summarise_each(funs(mean))
## Source: local data frame [3 x 5]
##
## Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 setosa 5.006 3.428 1.462 0.246
## 2 versicolor 5.936 2.770 4.260 1.326
## 3 virginica 6.588 2.974 5.552 2.026
#
iris %>%
gather(variable, value,-Species) %>%
group_by(Species, variable) %>%
summarise(value = mean(value)) %>%
spread(variable, value)
## Source: local data frame [3 x 5]
##
## Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 setosa 5.006 3.428 1.462 0.246
## 2 versicolor 5.936 2.770 4.260 1.326
## 3 virginica 6.588 2.974 5.552 2.026