## [1] "Hello Minkook"
## [1] "Hello Bob"
## [1] "Hello Sarah"
## carat cut color clarity depth table price x y z
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
## cut price
## 1 Fair 4358.758
## 2 Good 3928.864
## 3 Very Good 3981.760
## 4 Premium 4584.258
## 5 Ideal 3457.542
## cut color price
## 1 Fair D 4291.061
## 2 Good D 3405.382
## 3 Very Good D 3470.467
## 4 Premium D 3631.293
## 5 Ideal D 2629.095
## 6 Fair E 3682.312
## 7 Good E 3423.644
## 8 Very Good E 3214.652
## 9 Premium E 3538.914
## 10 Ideal E 2597.550
## 11 Fair F 3827.003
## 12 Good F 3495.750
## 13 Very Good F 3778.820
## 14 Premium F 4324.890
## 15 Ideal F 3374.939
## 16 Fair G 4239.255
## 17 Good G 4123.482
## 18 Very Good G 3872.754
## 19 Premium G 4500.742
## 20 Ideal G 3720.706
## 21 Fair H 5135.683
## 22 Good H 4276.255
## 23 Very Good H 4535.390
## 24 Premium H 5216.707
## 25 Ideal H 3889.335
## 26 Fair I 4685.446
## 27 Good I 5078.533
## 28 Very Good I 5255.880
## 29 Premium I 5946.181
## 30 Ideal I 4451.970
## 31 Fair J 4975.655
## 32 Good J 4574.173
## 33 Very Good J 5103.513
## 34 Premium J 6294.592
## 35 Ideal J 4918.186
## cut price carat
## 1 Fair 4358.758 1.0461366
## 2 Good 3928.864 0.8491847
## 3 Very Good 3981.760 0.8063814
## 4 Premium 4584.258 0.8919549
## 5 Ideal 3457.542 0.7028370
그러나 이 함수는 오로지 하나의 함수만 제공할 수 있다는 점입니다. 하나 이상의 함수를 적용시키려면 plyr or dplyr 패키지를 사용하는 것이 편리합니다.
plyr 패키지도 좋지만, 다 배울수는 없으니 파이프 연산자로 유명한 dplyr 패키지를 배워봅시다.
이 패키지는 주로 데이터프레임에 집중합니다. 점차 plyr 패키지를 대치에 데이터 작업의 사실상 표준 방법이 되고 있습니다. 열을 선택하는 select, 행을 필터하는 filter, 데이터를 그룹화하는 group_by, 기존의 열에 기반해 새로운 행을 추가하는 mutate 함수들이 존재합니다.
dplyr 패키지는 놀랍게도 빠르지만 이 패키지는 marrittr 패키지를 통해 구현되는 새로운 파이핑 패러다임을대중화하는 데도 이바지 했습니다. 파이핑을 사용하면 %>% 연산자를 사용해 앞의 함수의 결과를 이어지는 함수로 보낼 수 있습니다.
## [1] 4 10
## [1] 4 10
## carat cut color clarity depth table price x y z
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
## # A tibble: 53,940 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.290 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
## 7 0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47
## 8 0.26 Very Good H SI1 61.9 55 337 4.07 4.11 2.53
## 9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49
## 10 0.23 Very Good H VS1 59.4 61 338 4 4.05 2.39
## # ... with 53,930 more rows
## # A tibble: 53,940 x 2
## carat price
## <dbl> <int>
## 1 0.23 326
## 2 0.21 326
## 3 0.23 327
## 4 0.290 334
## 5 0.31 335
## 6 0.24 336
## 7 0.24 336
## 8 0.26 337
## 9 0.22 337
## 10 0.23 338
## # ... with 53,930 more rows
## # A tibble: 53,940 x 2
## carat price
## <dbl> <int>
## 1 0.23 326
## 2 0.21 326
## 3 0.23 327
## 4 0.290 334
## 5 0.31 335
## 6 0.24 336
## 7 0.24 336
## 8 0.26 337
## 9 0.22 337
## 10 0.23 338
## # ... with 53,930 more rows
## # A tibble: 53,940 x 2
## .dots1 .dots2
## <dbl> <int>
## 1 0.23 326
## 2 0.21 326
## 3 0.23 327
## 4 0.290 334
## 5 0.31 335
## 6 0.24 336
## 7 0.24 336
## 8 0.26 337
## 9 0.22 337
## 10 0.23 338
## # ... with 53,930 more rows
## # A tibble: 21,551 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.23 Ideal J VS1 62.8 56 340 3.93 3.9 2.46
## 3 0.31 Ideal J SI2 62.2 54 344 4.35 4.37 2.71
## 4 0.3 Ideal I SI2 62 54 348 4.31 4.34 2.68
## 5 0.33 Ideal I SI2 61.8 55 403 4.49 4.51 2.78
## 6 0.33 Ideal I SI2 61.2 56 403 4.49 4.5 2.75
## 7 0.33 Ideal J SI1 61.1 56 403 4.49 4.55 2.76
## 8 0.23 Ideal G VS1 61.9 54 404 3.93 3.95 2.44
## 9 0.32 Ideal I SI1 60.9 55 404 4.45 4.48 2.72
## 10 0.3 Ideal I SI2 61 59 405 4.3 4.33 2.63
## # ... with 21,541 more rows
## # A tibble: 26,457 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 3 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 4 0.3 Good J SI1 64 55 339 4.25 4.28 2.73
## 5 0.23 Ideal J VS1 62.8 56 340 3.93 3.9 2.46
## 6 0.31 Ideal J SI2 62.2 54 344 4.35 4.37 2.71
## 7 0.3 Ideal I SI2 62 54 348 4.31 4.34 2.68
## 8 0.3 Good J SI1 63.4 54 351 4.23 4.29 2.7
## 9 0.3 Good J SI1 63.8 56 351 4.23 4.26 2.71
## 10 0.3 Good I SI2 63.3 56 351 4.26 4.3 2.71
## # ... with 26,447 more rows
## ~cut == "Ideal"
## # A tibble: 21,551 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.23 Ideal J VS1 62.8 56 340 3.93 3.9 2.46
## 3 0.31 Ideal J SI2 62.2 54 344 4.35 4.37 2.71
## 4 0.3 Ideal I SI2 62 54 348 4.31 4.34 2.68
## 5 0.33 Ideal I SI2 61.8 55 403 4.49 4.51 2.78
## 6 0.33 Ideal I SI2 61.2 56 403 4.49 4.5 2.75
## 7 0.33 Ideal J SI1 61.1 56 403 4.49 4.55 2.76
## 8 0.23 Ideal G VS1 61.9 54 404 3.93 3.95 2.44
## 9 0.32 Ideal I SI1 60.9 55 404 4.45 4.48 2.72
## 10 0.3 Ideal I SI2 61 59 405 4.3 4.33 2.63
## # ... with 21,541 more rows
6.mutate
## # A tibble: 53,940 x 11
## carat cut color clarity depth table price x y z `price/carat`
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 1417.
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31 1552.
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 1422.
## 4 0.290 Premium I VS2 62.4 58 334 4.2 4.23 2.63 1152.
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75 1081.
## 6 0.24 Very G~ J VVS2 62.8 57 336 3.94 3.96 2.48 1400
## 7 0.24 Very G~ I VVS1 62.3 57 336 3.95 3.98 2.47 1400
## 8 0.26 Very G~ H SI1 61.9 55 337 4.07 4.11 2.53 1296.
## 9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49 1532.
## 10 0.23 Very G~ H VS1 59.4 61 338 4 4.05 2.39 1470.
## # ... with 53,930 more rows
## # A tibble: 53,940 x 4
## carat price Ratio Double
## <dbl> <int> <dbl> <dbl>
## 1 0.23 326 1417. 2835.
## 2 0.21 326 1552. 3105.
## 3 0.23 327 1422. 2843.
## 4 0.290 334 1152. 2303.
## 5 0.31 335 1081. 2161.
## 6 0.24 336 1400 2800
## 7 0.24 336 1400 2800
## 8 0.26 337 1296. 2592.
## 9 0.22 337 1532. 3064.
## 10 0.23 338 1470. 2939.
## # ... with 53,930 more rows
## # A tibble: 5 x 2
## cut AvgPrice
## <ord> <dbl>
## 1 Fair 4359.
## 2 Good 3929.
## 3 Very Good 3982.
## 4 Premium 4584.
## 5 Ideal 3458.
topN <- function(x, N=5){
x %>% arrange(desc(price)) %>% head(N)
}
diamonds %>% group_by(cut) %>% do(topN(.,N=3))## # A tibble: 15 x 10
## # Groups: cut [5]
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 2.01 Fair G SI1 70.6 64 18574 7.43 6.64 4.69
## 2 2.02 Fair H VS2 64.5 57 18565 8 7.95 5.14
## 3 4.5 Fair J I1 65.8 58 18531 10.2 10.2 6.72
## 4 2.8 Good G SI2 63.8 58 18788 8.9 8.85 0
## 5 2.07 Good I VS2 61.8 61 18707 8.12 8.16 5.03
## 6 2.67 Good F SI2 63.8 58 18686 8.69 8.64 5.54
## 7 2 Very Good G SI1 63.5 56 18818 7.9 7.97 5.04
## 8 2 Very Good H SI1 62.8 57 18803 7.95 8 5.01
## 9 2.03 Very Good H SI1 63 60 18781 8 7.93 5.02
## 10 2.29 Premium I VS2 60.8 60 18823 8.5 8.47 5.16
## 11 2.29 Premium I SI1 61.8 59 18797 8.52 8.45 5.24
## 12 2.04 Premium H SI1 58.1 60 18795 8.37 8.28 4.84
## 13 1.51 Ideal G IF 61.7 55 18806 7.37 7.41 4.56
## 14 2.07 Ideal G SI2 62.5 55 18804 8.2 8.13 5.11
## 15 2.15 Ideal G SI2 62.6 54 18791 8.29 8.35 5.21