We use the standard formula for sample size estimation in proportion studies:
\[ n = \frac{z^2 \cdot p \cdot q}{d^2} \]
where:
- z = z-value at 95% CI (1.96)
- p = estimated proportion (0.5 used as conservative
estimate)
- q = (1 - p)
- d = margin of error (0.05)
# Sample Size Calculation
z <- 1.96
p <- 0.5
d <- 0.05
n <- z^2 * p * (1 - p) / d^2
n
## [1] 384.16
The required sample size is approximately 384.
gtsummary
# install.packages("gtsummary") # Uncomment if not installed
library(gtsummary)
library(dplyr)
We use the built-in trial
dataset from
gtsummary:
head(trial)
## # A tibble: 6 × 8
## trt age marker stage grade response death ttdeath
## <chr> <dbl> <dbl> <fct> <fct> <int> <int> <dbl>
## 1 Drug A 23 0.16 T1 II 0 0 24
## 2 Drug B 9 1.11 T2 I 1 0 24
## 3 Drug A 31 0.277 T1 II 0 0 24
## 4 Drug A NA 2.07 T3 III 1 1 17.6
## 5 Drug A 51 2.77 T4 III 1 1 16.4
## 6 Drug B 39 0.613 T4 I 0 1 15.6
trial %>%
tbl_summary()
Characteristic | N = 2001 |
---|---|
Chemotherapy Treatment | |
Drug A | 98 (49%) |
Drug B | 102 (51%) |
Age | 47 (38, 57) |
Unknown | 11 |
Marker Level (ng/mL) | 0.64 (0.22, 1.41) |
Unknown | 10 |
T Stage | |
T1 | 53 (27%) |
T2 | 54 (27%) |
T3 | 43 (22%) |
T4 | 50 (25%) |
Grade | |
I | 68 (34%) |
II | 68 (34%) |
III | 64 (32%) |
Tumor Response | 61 (32%) |
Unknown | 7 |
Patient Died | 112 (56%) |
Months to Death/Censor | 22.4 (15.9, 24.0) |
1 n (%); Median (Q1, Q3) |
trial2 <- trial %>%
select(trt, age, grade)
trial2 %>%
tbl_summary()
Characteristic | N = 2001 |
---|---|
Chemotherapy Treatment | |
Drug A | 98 (49%) |
Drug B | 102 (51%) |
Age | 47 (38, 57) |
Unknown | 11 |
Grade | |
I | 68 (34%) |
II | 68 (34%) |
III | 64 (32%) |
1 n (%); Median (Q1, Q3) |
trial %>%
tbl_summary(by = trt)
Characteristic | Drug A N = 981 |
Drug B N = 1021 |
---|---|---|
Age | 46 (37, 60) | 48 (39, 56) |
Unknown | 7 | 4 |
Marker Level (ng/mL) | 0.84 (0.23, 1.60) | 0.52 (0.18, 1.21) |
Unknown | 6 | 4 |
T Stage | ||
T1 | 28 (29%) | 25 (25%) |
T2 | 25 (26%) | 29 (28%) |
T3 | 22 (22%) | 21 (21%) |
T4 | 23 (23%) | 27 (26%) |
Grade | ||
I | 35 (36%) | 33 (32%) |
II | 32 (33%) | 36 (35%) |
III | 31 (32%) | 33 (32%) |
Tumor Response | 28 (29%) | 33 (34%) |
Unknown | 3 | 4 |
Patient Died | 52 (53%) | 60 (59%) |
Months to Death/Censor | 23.5 (17.4, 24.0) | 21.2 (14.5, 24.0) |
1 Median (Q1, Q3); n (%) |
trial %>%
tbl_summary(by = trt) %>%
add_p()
Characteristic | Drug A N = 981 |
Drug B N = 1021 |
p-value2 |
---|---|---|---|
Age | 46 (37, 60) | 48 (39, 56) | 0.7 |
Unknown | 7 | 4 | |
Marker Level (ng/mL) | 0.84 (0.23, 1.60) | 0.52 (0.18, 1.21) | 0.085 |
Unknown | 6 | 4 | |
T Stage | 0.9 | ||
T1 | 28 (29%) | 25 (25%) | |
T2 | 25 (26%) | 29 (28%) | |
T3 | 22 (22%) | 21 (21%) | |
T4 | 23 (23%) | 27 (26%) | |
Grade | 0.9 | ||
I | 35 (36%) | 33 (32%) | |
II | 32 (33%) | 36 (35%) | |
III | 31 (32%) | 33 (32%) | |
Tumor Response | 28 (29%) | 33 (34%) | 0.5 |
Unknown | 3 | 4 | |
Patient Died | 52 (53%) | 60 (59%) | 0.4 |
Months to Death/Censor | 23.5 (17.4, 24.0) | 21.2 (14.5, 24.0) | 0.14 |
1 Median (Q1, Q3); n (%) | |||
2 Wilcoxon rank sum test; Pearson’s Chi-squared test |
trial %>%
tbl_summary(
by = trt,
statistic = list(
all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{n} / {N} ({p}%)"
)
)
Characteristic | Drug A N = 981 |
Drug B N = 1021 |
---|---|---|
Age | 47 (15) | 47 (14) |
Unknown | 7 | 4 |
Marker Level (ng/mL) | 1.02 (0.89) | 0.82 (0.83) |
Unknown | 6 | 4 |
T Stage | ||
T1 | 28 / 98 (29%) | 25 / 102 (25%) |
T2 | 25 / 98 (26%) | 29 / 102 (28%) |
T3 | 22 / 98 (22%) | 21 / 102 (21%) |
T4 | 23 / 98 (23%) | 27 / 102 (26%) |
Grade | ||
I | 35 / 98 (36%) | 33 / 102 (32%) |
II | 32 / 98 (33%) | 36 / 102 (35%) |
III | 31 / 98 (32%) | 33 / 102 (32%) |
Tumor Response | 28 / 95 (29%) | 33 / 98 (34%) |
Unknown | 3 | 4 |
Patient Died | 52 / 98 (53%) | 60 / 102 (59%) |
Months to Death/Censor | 20.2 (5.0) | 19.0 (5.5) |
1 Mean (SD); n / N (%) |
trial2 %>%
tbl_summary(by = trt) %>%
add_p() %>%
add_overall() %>%
add_n() %>%
modify_header(label ~ "**Variable**") %>%
modify_spanning_header(c("stat_1", "stat_2") ~ "**Treatment Received**") %>%
modify_footnote(all_stat_cols() ~ "Median (IQR) or Frequency (%)") %>%
modify_caption("**Table 1. Patient Characteristics**") %>%
bold_labels()
Variable | N | Overall N = 2001 |
Treatment Received
|
p-value2 | |
---|---|---|---|---|---|
Drug A N = 981 |
Drug B N = 1021 |
||||
Age | 189 | 47 (38, 57) | 46 (37, 60) | 48 (39, 56) | 0.7 |
Unknown | 11 | 7 | 4 | ||
Grade | 200 | 0.9 | |||
I | 68 (34%) | 35 (36%) | 33 (32%) | ||
II | 68 (34%) | 32 (33%) | 36 (35%) | ||
III | 64 (32%) | 31 (32%) | 33 (32%) | ||
1 Median (IQR) or Frequency (%) | |||||
2 Wilcoxon rank sum test; Pearson’s Chi-squared test |
trial2 %>%
tbl_summary(by = trt, missing = "no") %>%
add_n() %>%
as_gt() %>%
gt::tab_source_note(gt::md("*This data is simulated*"))
Characteristic | N | Drug A N = 981 |
Drug B N = 1021 |
---|---|---|---|
Age | 189 | 46 (37, 60) | 48 (39, 56) |
Grade | 200 | ||
I | 35 (36%) | 33 (32%) | |
II | 32 (33%) | 36 (35%) | |
III | 31 (32%) | 33 (32%) | |
This data is simulated | |||
1 Median (Q1, Q3); n (%) |
trial2 %>%
select(age, trt) %>%
tbl_summary(
by = trt,
type = all_continuous() ~ "continuous2",
statistic = all_continuous() ~ c(
"{N_nonmiss}",
"{median} ({p25}, {p75})",
"{min}, {max}"
),
missing = "no"
) %>%
add_p(pvalue_fun = ~ style_pvalue(.x, digits = 2))
Characteristic | Drug A N = 98 |
Drug B N = 102 |
p-value1 |
---|---|---|---|
Age | 0.72 | ||
N Non-missing | 91 | 98 | |
Median (Q1, Q3) | 46 (37, 60) | 48 (39, 56) | |
Min, Max | 6, 78 | 9, 83 | |
1 Wilcoxon rank sum test |
gtsummary
package, we generated descriptive
statistics, cross tables, p-values, and well-formatted summary
tables.