## [1] 3
分组求均值
## [1] 3.428
## [1] 5.006
接下来看简单方法
## [1] 5.843333
iris %>% summarise(
n=n(),
unique=n_distinct(Species),
first=first(Species),
last=last(Species),
`51sst`=nth(Species,51)
)iris %>%
group_by(Species) %>%
summarise(sd.Petal.Length=sd(Petal.Length),
sum.Petal.Length=sum(Petal.Length),
mean.Petal.Length=mean(Petal.Length))默认情况下,以后的分组会覆盖现有分组,但是可以用add增加参数来添加到现有分组上下文中
iris %>%
group_by(Species) %>%
mutate(SpeciesN=n()) %>%
group_by(Sepal.Length) %>%
mutate(Sepal.LengthN=n()) %>%
datatable()iris %>%
group_by(Species) %>%
mutate(SpeciesN=n()) %>%
group_by(Sepal.Length,add=T) %>%
mutate(Sepal.LengthN=n()) %>%
datatable()##删除分组ungroup 如果删除分组,上下文才能再次在整个表上工作,可用ungroup()函数
## tibble [150 x 6] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ Sepal.Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ SpeciesN : int [1:150] 50 50 50 50 50 50 50 50 50 50 ...
## - attr(*, "groups")= tibble [3 x 2] (S3: tbl_df/tbl/data.frame)
## ..$ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 2 3
## ..$ .rows :List of 3
## .. ..$ : int [1:50] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..$ : int [1:50] 51 52 53 54 55 56 57 58 59 60 ...
## .. ..$ : int [1:50] 101 102 103 104 105 106 107 108 109 110 ...
## ..- attr(*, ".drop")= logi TRUE
## tibble [150 x 6] (S3: tbl_df/tbl/data.frame)
## $ Sepal.Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ SpeciesN : int [1:150] 50 50 50 50 50 50 50 50 50 50 ...
X <- tibble(
A=letters[1:3],
B=c("t","u","v"),
C=1:3)
Y <- tibble(
A=c("a","b","d"),
B=c("t","u","w"),
D=3:1
)## Joining, by = c("A", "B")
## Joining, by = c("A", "B")
如果两个表具有相同结构,union()删除重复数据,union_all()不删除重复数据,完整拼接。