cut
range(pg$body_mass_g, na.rm=T)
## [1] 2700 6300
pg %>%
mutate(body_bin = cut(body_mass_g, breaks = c(seq(2500, 6500, 500)), dig.lab = 4)) %>%
group_by(body_bin) %>%
tally() # summarise(n = n())
## # A tibble: 9 x 2
## body_bin n
## <fct> <int>
## 1 (2500,3000] 11
## 2 (3000,3500] 67
## 3 (3500,4000] 92
## 4 (4000,4500] 57
## 5 (4500,5000] 54
## 6 (5000,5500] 33
## 7 (5500,6000] 26
## 8 (6000,6500] 2
## 9 <NA> 2
dlookr::binning
library(dlookr)
binning(pg$body_mass_g)
## binned type: quantile
## number of bins: 10
## x
## [2700,3289.167] (3289.167,3470] (3470,3650] (3650,3800] (3800,4050]
## 34 34 36 37 35
## (4050,4300] (4300,4650] (4650,4955] (4955,5421.667] (5421.667,6300]
## 31 35 32 34 34
## <NA>
## 2
binning(pg$body_mass_g, type = "equal")
## binned type: equal
## number of bins: 10
## x
## [2700,3060] (3060,3420] (3420,3780] (3780,4140] (4140,4500] (4500,4860]
## 15 43 71 53 45 38
## (4860,5220] (5220,5580] (5580,5940] (5940,6300] <NA>
## 28 27 16 6 2
binning(pg$body_mass_g, type = "pretty")
## binned type: pretty
## number of bins: 8
## x
## [2500,3000] (3000,3500] (3500,4000] (4000,4500] (4500,5000] (5000,5500]
## 11 67 92 57 54 33
## (5500,6000] (6000,6500] <NA>
## 26 2 2
pg %>%
mutate(body_bin = binning(pg$body_mass_g, type = "pretty") %>% extract()) %>%
group_by(species, body_bin) %>%
summarise(freq = n())
## # A tibble: 18 x 3
## # Groups: species [3]
## species body_bin freq
## <fct> <ord> <int>
## 1 Adelie [2500,3000] 9
## 2 Adelie (3000,3500] 50
## 3 Adelie (3500,4000] 57
## 4 Adelie (4000,4500] 28
## 5 Adelie (4500,5000] 7
## 6 Adelie <NA> 1
## 7 Chinstrap [2500,3000] 2
## 8 Chinstrap (3000,3500] 17
## 9 Chinstrap (3500,4000] 34
## 10 Chinstrap (4000,4500] 13
## 11 Chinstrap (4500,5000] 2
## 12 Gentoo (3500,4000] 1
## 13 Gentoo (4000,4500] 16
## 14 Gentoo (4500,5000] 45
## 15 Gentoo (5000,5500] 33
## 16 Gentoo (5500,6000] 26
## 17 Gentoo (6000,6500] 2
## 18 Gentoo <NA> 1
pg %>%
mutate(body_bin = binning(pg$body_mass_g,
type = "pretty",
nbin = 5,
labels = c("2K", "3K", "4K", "5K", "6K"))) %>%
group_by(species, body_bin) %>%
summarise(freq = n())
## # A tibble: 12 x 3
## # Groups: species [3]
## species body_bin freq
## <fct> <bins> <int>
## 1 Adelie 2K 9
## 2 Adelie 3K 107
## 3 Adelie 4K 35
## 4 Adelie <NA> 1
## 5 Chinstrap 2K 2
## 6 Chinstrap 3K 51
## 7 Chinstrap 4K 15
## 8 Gentoo 3K 1
## 9 Gentoo 4K 61
## 10 Gentoo 5K 59
## 11 Gentoo 6K 2
## 12 Gentoo <NA> 1
table(cut(pg$body_mass_g,
breaks = quantile(pg$body_mass_g, na.rm = T),
labels = c("Q1", "Q2", "Q3", "Q4")))
##
## Q1 Q2 Q3 Q4
## 88 87 81 85