> df <- data.frame(ID = 1:20,
+ NAME = LETTERS[1:20],
+ STRATA = c(rep("S1",5), rep("S2", 8), rep("S3",7)))
> df
ID NAME STRATA
1 1 A S1
2 2 B S1
3 3 C S1
4 4 D S1
5 5 E S1
6 6 F S2
7 7 G S2
8 8 H S2
9 9 I S2
10 10 J S2
11 11 K S2
12 12 L S2
13 13 M S2
14 14 N S3
15 15 O S3
16 16 P S3
17 17 Q S3
18 18 R S3
19 19 S S3
20 20 T S3
> library(dplyr)
> set.seed(2024) # setting seed for reproducibility
> df %>%
+ group_by(STRATA) %>%
+ sample_n(3) # taking 3 samples from each stratum
# A tibble: 9 × 3
# Groups: STRATA [3]
ID NAME STRATA
<int> <chr> <chr>
1 2 B S1
2 1 A S1
3 4 D S1
4 9 I S2
5 12 L S2
6 6 F S2
7 18 R S3
8 14 N S3
9 15 O S3
Sample size 128, strate 2 ( age <1 month and >1 month
> set.seed(2024)
> df_prac <- data.frame(ID = rnorm(128)*100 |> round(),
+ AGE_GROUP = sample(c("Age < 1", "Age > 1"), size = 128, replace = TRUE))
> head(df_prac)
ID AGE_GROUP
1 98.19694 Age < 1
2 46.87150 Age < 1
3 -10.79713 Age < 1
4 -21.28782 Age > 1
5 115.80985 Age > 1
6 129.23548 Age < 1
> df_prac %>%
+ group_by(AGE_GROUP) %>% # AGE_GROUP contains two strata
+ sample_n(10) # Taking 10 samples from each stratum
# A tibble: 20 × 2
# Groups: AGE_GROUP [2]
ID AGE_GROUP
<dbl> <chr>
1 -124. Age < 1
2 -76.6 Age < 1
3 111. Age < 1
4 37.4 Age < 1
5 -49.9 Age < 1
6 -130. Age < 1
7 68.5 Age < 1
8 -127. Age < 1
9 -21.3 Age < 1
10 -112. Age < 1
11 -2.89 Age > 1
12 -18.3 Age > 1
13 -32.9 Age > 1
14 -120. Age > 1
15 -117. Age > 1
16 -327. Age > 1
17 -12.9 Age > 1
18 197. Age > 1
19 49.2 Age > 1
20 -26.5 Age > 1