Features of Tidy Data:
All variables are in one column
Each observation forms a row
Each table holds data about one type of observation
library(reshape2)
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
Refer to ?melt for details.
mtcars.copy <- mtcars
mtcars.copy$carname <- rownames(mtcars)
head(mtcars.copy)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
## carname
## Mazda RX4 Mazda RX4
## Mazda RX4 Wag Mazda RX4 Wag
## Datsun 710 Datsun 710
## Hornet 4 Drive Hornet 4 Drive
## Hornet Sportabout Hornet Sportabout
## Valiant Valiant
carmelt <- melt(data = mtcars.copy, id.vars = c("carname", "gear", "cyl"), measure.vars = c("mpg", "hp"))
carmelt
## carname gear cyl variable value
## 1 Mazda RX4 4 6 mpg 21.0
## 2 Mazda RX4 Wag 4 6 mpg 21.0
## 3 Datsun 710 4 4 mpg 22.8
## 4 Hornet 4 Drive 3 6 mpg 21.4
## 5 Hornet Sportabout 3 8 mpg 18.7
## 6 Valiant 3 6 mpg 18.1
## 7 Duster 360 3 8 mpg 14.3
## 8 Merc 240D 4 4 mpg 24.4
## 9 Merc 230 4 4 mpg 22.8
## 10 Merc 280 4 6 mpg 19.2
## 11 Merc 280C 4 6 mpg 17.8
## 12 Merc 450SE 3 8 mpg 16.4
## 13 Merc 450SL 3 8 mpg 17.3
## 14 Merc 450SLC 3 8 mpg 15.2
## 15 Cadillac Fleetwood 3 8 mpg 10.4
## 16 Lincoln Continental 3 8 mpg 10.4
## 17 Chrysler Imperial 3 8 mpg 14.7
## 18 Fiat 128 4 4 mpg 32.4
## 19 Honda Civic 4 4 mpg 30.4
## 20 Toyota Corolla 4 4 mpg 33.9
## 21 Toyota Corona 3 4 mpg 21.5
## 22 Dodge Challenger 3 8 mpg 15.5
## 23 AMC Javelin 3 8 mpg 15.2
## 24 Camaro Z28 3 8 mpg 13.3
## 25 Pontiac Firebird 3 8 mpg 19.2
## 26 Fiat X1-9 4 4 mpg 27.3
## 27 Porsche 914-2 5 4 mpg 26.0
## 28 Lotus Europa 5 4 mpg 30.4
## 29 Ford Pantera L 5 8 mpg 15.8
## 30 Ferrari Dino 5 6 mpg 19.7
## 31 Maserati Bora 5 8 mpg 15.0
## 32 Volvo 142E 4 4 mpg 21.4
## 33 Mazda RX4 4 6 hp 110.0
## 34 Mazda RX4 Wag 4 6 hp 110.0
## 35 Datsun 710 4 4 hp 93.0
## 36 Hornet 4 Drive 3 6 hp 110.0
## 37 Hornet Sportabout 3 8 hp 175.0
## 38 Valiant 3 6 hp 105.0
## 39 Duster 360 3 8 hp 245.0
## 40 Merc 240D 4 4 hp 62.0
## 41 Merc 230 4 4 hp 95.0
## 42 Merc 280 4 6 hp 123.0
## 43 Merc 280C 4 6 hp 123.0
## 44 Merc 450SE 3 8 hp 180.0
## 45 Merc 450SL 3 8 hp 180.0
## 46 Merc 450SLC 3 8 hp 180.0
## 47 Cadillac Fleetwood 3 8 hp 205.0
## 48 Lincoln Continental 3 8 hp 215.0
## 49 Chrysler Imperial 3 8 hp 230.0
## 50 Fiat 128 4 4 hp 66.0
## 51 Honda Civic 4 4 hp 52.0
## 52 Toyota Corolla 4 4 hp 65.0
## 53 Toyota Corona 3 4 hp 97.0
## 54 Dodge Challenger 3 8 hp 150.0
## 55 AMC Javelin 3 8 hp 150.0
## 56 Camaro Z28 3 8 hp 245.0
## 57 Pontiac Firebird 3 8 hp 175.0
## 58 Fiat X1-9 4 4 hp 66.0
## 59 Porsche 914-2 5 4 hp 91.0
## 60 Lotus Europa 5 4 hp 113.0
## 61 Ford Pantera L 5 8 hp 264.0
## 62 Ferrari Dino 5 6 hp 175.0
## 63 Maserati Bora 5 8 hp 335.0
## 64 Volvo 142E 4 4 hp 109.0
nrow(carmelt)
## [1] 64
Refer to ?dcast for details.
cylData <- dcast(data = carmelt, formula = cyl ~ variable)
## Aggregation function missing: defaulting to length
cylData
## cyl mpg hp
## 1 4 11 11
## 2 6 7 7
## 3 8 14 14
cylData <- dcast(data = carmelt, formula = cyl ~ variable, fun.aggregate = mean)
cylData
## cyl mpg hp
## 1 4 26.66364 82.63636
## 2 6 19.74286 122.28571
## 3 8 15.10000 209.21429
Refer to ?tapply, ?split, ?sapply, ?lapply for details.
head(InsectSprays)
## count spray
## 1 10 A
## 2 7 A
## 3 20 A
## 4 14 A
## 5 14 A
## 6 12 A
tapply(X = InsectSprays$count, INDEX = InsectSprays$spray, FUN = sum)
## A B C D E F
## 174 184 25 59 42 200
# Alternative approach
sp <- split(x = InsectSprays$count, f = InsectSprays$spray)
sp
## $A
## [1] 10 7 20 14 14 12 10 23 17 20 14 13
##
## $B
## [1] 11 17 21 11 16 14 17 17 19 21 7 13
##
## $C
## [1] 0 1 7 2 3 1 2 1 3 0 1 4
##
## $D
## [1] 3 5 12 6 4 3 5 5 5 5 2 4
##
## $E
## [1] 3 5 3 5 3 6 1 1 3 2 6 4
##
## $F
## [1] 11 9 15 22 15 16 13 10 26 26 24 13
sapply(X = sp, FUN = sum)
## A B C D E F
## 174 184 25 59 42 200