# tbl_summary(
# data,
# by = NULL,
# label = NULL, # 默认标签
# statistic = list(all_continuous() ~ "{median} ({p25}, {p75})", all_categorical() ~"{n} ({p}%)"), # 统计汇总信息显示
# digits = NULL, # 小数位数
# type = NULL, # c("continuous", "continuous2", "categorical", "dichotomous")
# value = NULL,
# missing = c("ifany", "no", "always"),
# missing_text = "Unknown",
# missing_stat = "{N_miss}",
# sort = all_categorical(FALSE) ~ "alphanumeric",# c("alphanumeric", "frequency")
# percent = c("column", "row", "cell"),
# include = everything()
# )
gtsummary包
{gtsummary}
包
{gtsummary}
包 (Package index) 提供了一种优雅而灵活的方法,可以使用R编程语言创建可发布的分析和汇总表。{gtsummary}
包使用具有高度可定制功能的合理默认值汇总数据集、回归模型等。
1.{tbl_summary()}
绘制 Table 1
tbl_summary()
"dichotomous"
“二分”分类变量显示在单行上,而不是每个变量水平显示一行。编码为TRUE/TRUE、0/1或yes/no的变量被假定为二分的,并且显示TRUE、1和yes行。否则,必须在value参数中指定要显示的值,例如value = list(varname ~“level to show”)
在R中轻松总结数据帧或数据块。非常适合呈现描述性统计数据,比较群体人口统计数据(例如为医学期刊创建Table 1)等。自动检测数据集中的连续变量、分类变量和二分变量,计算适当的描述性统计量,还包括每个变量中的缺失量。
tbl_summary()
函数计算R中连续、分类和二分变量的描述性统计量,并将结果显示在一个漂亮的、可定制的汇总表中,以供发布(例如,Table 1或 demographic tables.)
1.1 Set up 设置
# install.packages("gtsummary")
library(gtsummary)
1.2 Example data set 示例数据集
该数据集包含来自200名接受两种类型化疗(药物A或药物B)之一的患者的数据。结果是肿瘤缓解和死亡。
数据框中的每个变量都被分配了一个带有标签包的属性标签(即 attr(undefined,“label”)==
“Chemotherapy Treatment”
)。默认情况下,这些标签显示在{gtsummary}
输出表中。在没有标签的数据框上使用{gtsummary}
只会打印变量名称来代替变量标签;还有一个选项可以在以后添加标签。
head(trial)
# A tibble: 6 × 8
trt age marker stage grade response death ttdeath
<chr> <dbl> <dbl> <fct> <fct> <int> <int> <dbl>
1 Drug A 23 0.16 T1 II 0 0 24
2 Drug B 9 1.11 T2 I 1 0 24
3 Drug A 31 0.277 T1 II 0 0 24
4 Drug A NA 2.07 T3 III 1 1 17.6
5 Drug A 51 2.77 T4 III 1 1 16.4
6 Drug B 39 0.613 T4 I 0 1 15.6
为了简洁起见,在本教程中,我们将使用试验数据集中的一个变量子集。
<- trial |> select(trt, age, grade) trial2
1.3 Basic Usage 基础用法
从试验数据集创建一个汇总统计表。tbl_summary()
函数至少可以将数据框作为唯一输入,并返回数据框中每列的描述性统计信息。
|> tbl_summary() trial2
Characteristic | N = 2001 |
---|---|
Chemotherapy Treatment | |
Drug A | 98 (49%) |
Drug B | 102 (51%) |
Age | 47 (38, 57) |
Unknown | 11 |
Grade | |
I | 68 (34%) |
II | 68 (34%) |
III | 64 (32%) |
1 n (%); Median (Q1, Q3) |
注意这个基本用法的合理默认值;每个默认值都可以自定义。
自动检测变量类型,以便计算适当的描述性统计量。
数据集中的标签属性将自动打印。
缺失值在表中列为“unknown”。
Variable 缩进并添加脚注。
对于本研究数据,应按治疗组划分汇总统计量,可使用by=
参数进行划分。若要比较两个或多个组,请在函数调用中包含add_p()
,它会检测变量类型并使用适当的统计测试。
|>
trial2 tbl_summary(by = trt) |>
add_p()
Characteristic | Drug A N = 981 |
Drug B N = 1021 |
p-value2 |
---|---|---|---|
Age | 46 (37, 60) | 48 (39, 56) | 0.7 |
Unknown | 7 | 4 | |
Grade | 0.9 | ||
I | 35 (36%) | 33 (32%) | |
II | 32 (33%) | 36 (35%) | |
III | 31 (32%) | 33 (32%) | |
1 Median (Q1, Q3); n (%) | |||
2 Wilcoxon rank sum test; Pearson’s Chi-squared test |
1.4 Customize Output 自定义输出
有四种主要方法可以自定义汇总表的输出
使用
tbl_summary()
函数参数使用
add_*()
函数向汇总表中添加其他数据/信息使用
{gtsummary}
函数修改汇总表外观使用
{gt}
包函数修改表外观
1.4.1 修改 tbl_summary()
函数参数
tbl_summary()
函数包含许多用于修改外观的输入选项。
修改tbl_summary()
参数的示例
|>
trial2 tbl_summary(
by = trt, # 分组
statistic = list( # 统计量显示格式
all_continuous() ~ "{mean} ({sd})", # 连续型变量
all_categorical() ~ "{n} / {N} ({p}%)" # 分类型变量
),digits = all_continuous() ~ 2, # 小数位数
label = grade ~ "Tumor Grade", # 变量重命名
missing_text = "(Missing)"# 缺失值
)
Characteristic | Drug A N = 981 |
Drug B N = 1021 |
---|---|---|
Age | 47.01 (14.71) | 47.45 (14.01) |
(Missing) | 7 | 4 |
Tumor Grade | ||
I | 35 / 98 (36%) | 33 / 102 (32%) |
II | 32 / 98 (33%) | 36 / 102 (35%) |
III | 31 / 98 (32%) | 33 / 102 (32%) |
1 Mean (SD); n / N (%) |
有多种方法可以使用单个公式、公式列表和命名列表来指定 statistic=
参数。下表显示了指定连续变量年龄和标记的均值统计量的等效方法。任何接受公式的 {gtsummary}
函数参数都将接受这些变量中的每一个。
Select with Helpers eg:
all_continuous() ~ "{mean}"
Select by Variable Name eg:
c(age, marker) ~ "{mean}"
Select with Named List eg:
list(age = "{mean}", marker = "{mean}")
1.4.2 {gtsummary}
函数添加信息
{gtsummary}
包具有向tbl_summary()
表添加信息或统计信息的函数。
# add_overall(
# x, # gtsummary
# last = FALSE, # 是否显示在最后一列
# col_label = "**Overall** \nN = {style_number(N)}", # 汇总列显示的列名,默认
# statistic = NULL, # 调用的统计参数,默认为 NULL/ ~"{p}% (n={n})" ~ c(1,0)
# digits = NULL, # 小数位数
# ...
# )
1.4.3 {gtsummary}
用于格式化表格的函数
{gtsummary}
包附带了专门用于修改和格式化汇总表的函数。
添加tbl_summary()
系列函数的示例
|>
trial2 tbl_summary(by = trt) |> # 分组变量
add_p(pvalue_fun = label_style_pvalue(digits = 2)) |>
add_overall() |> # 添加汇总列
add_n() |> # 添加记数列
modify_header(label ~ "**Variable**") |> # 表头
modify_spanning_header(c("stat_1", "stat_2") ~ "**Treatment Received**") |>
modify_footnote( # 脚注
all_stat_cols() ~ "Median (IQR) or Frequency (%)"
|>
) modify_caption("**Table 1. Patient Characteristics**") |> # 图注
bold_labels() # 变量标签粗体
Variable | N | Overall N = 2001 |
Treatment Received
|
p-value2 | |
---|---|---|---|---|---|
Drug A N = 981 |
Drug B N = 1021 |
||||
Age | 189 | 47 (38, 57) | 46 (37, 60) | 48 (39, 56) | 0.72 |
Unknown | 11 | 7 | 4 | ||
Grade | 200 | 0.87 | |||
I | 68 (34%) | 35 (36%) | 33 (32%) | ||
II | 68 (34%) | 32 (33%) | 36 (35%) | ||
III | 64 (32%) | 31 (32%) | 33 (32%) | ||
1 Median (IQR) or Frequency (%) | |||||
2 Wilcoxon rank sum test; Pearson’s Chi-squared test |
1.4.4 使用{gt}
包函数修改表外观
{gt}
包包含了许多修改表格输出的强大功能。
要将 {gt}
包函数用于 {gtsummary}
表,必须先将汇总表转换为 gt
对象。为此,请在使用 {gtsummary}
函数完成修改后使用 as_gt()
函数。
|>
trial2 tbl_summary(by = trt, missing = "no") |>
add_n() |>
as_gt() |>
::tab_source_note(gt::md("*This data is simulated*")) gt
Characteristic | N | Drug A N = 981 |
Drug B N = 1021 |
---|---|---|---|
Age | 189 | 46 (37, 60) | 48 (39, 56) |
Grade | 200 | ||
I | 35 (36%) | 33 (32%) | |
II | 32 (33%) | 36 (35%) | |
III | 31 (32%) | 33 (32%) | |
This data is simulated | |||
1 Median (Q1, Q3); n (%) |
1.5 Select Helpers 选择助手
- 整个
tidyverse
中可用的所有{tidyselect}
帮助程序,例如starts_with()
、contains()
和everything()
(即任何可以与dplyr::select()
函数一起使用的东西),都可以与{gtsummary}
一起使用。 - 包中包含的其他
{gtsummary}
选择器,用于补充tidyselect
功能:Summary type
# all_continuous() # all_categorical()
1.6 Multi-line Continuous Summaries 多行连续摘要
连续变量也可以在多行上进行汇总-这是某些期刊中的常见格式。要更新连续变量以在多行上汇总,请将汇总类型更新为“continuous 2
”(用于两行或多行上的汇总)。
|>
trial2 select(age, trt) |> # 选择age,trt列
tbl_summary(
by = trt, # 分组变量
type = all_continuous() ~ "continuous2", # 连续型变量在多行上汇总
statistic = all_continuous() ~ c( # 统计量显示
"{N_nonmiss}",
"{median} ({p25}, {p75})",
"{min}, {max}"
),missing = "no" # 缺失值
|>
) add_p(pvalue_fun = label_style_pvalue(digits = 2)) #P值
Characteristic | Drug A N = 98 |
Drug B N = 102 |
p-value1 |
---|---|---|---|
Age | 0.72 | ||
N Non-missing | 91 | 98 | |
Median (Q1, Q3) | 46 (37, 60) | 48 (39, 56) | |
Min, Max | 6, 78 | 9, 83 | |
1 Wilcoxon rank sum test |
1.7 Advanced Customization 高级定制
适用于所有{gtsummary}
对象
{gtsummary}
表有两个重要的内部对象:
当您将 tbl_summary()
函数的输出打印到R控制台或 R markdown文档中时,.$table_body
数据帧使用.$table_styling
中列出的说明进行格式化.默认输出使用 as_gt()
通过在.$table_body
上执行的一系列{gt}
命令将{gtsummary}
对象转换为{gt}
对象,以下是使用tbl_summary()
保存的前
tbl_summary(trial2) |>
as_gt(return_calls = TRUE) |>
head(n = 4)
$gt
gt::gt(data = x$table_body, groupname_col = NULL, caption = NULL)
$fmt_missing
$fmt_missing[[1]]
gt::sub_missing(columns = gt::everything(), missing_text = "")
$cols_merge
list()
$cols_align
$cols_align[[1]]
gt::cols_align(columns = c("variable", "var_type", "row_type",
"var_label", "stat_0"), align = "center")
$cols_align[[2]]
gt::cols_align(columns = "label", align = "left")
#> $gt
#> gt::gt(data = x$table_body, groupname_col = NULL, caption = NULL)
#>
#> $fmt_missing
#> $fmt_missing[[1]]
#> gt::sub_missing(columns = gt::everything(), missing_text = "")
#>
#>
#> $cols_merge
#> list()
#>
#> $cols_align
#> $cols_align[[1]]
#> gt::cols_align(columns = c("variable", "var_type", "row_type",
#> "var_label", "stat_0"), align = "center")
#>
#> $cols_align[[2]]
#> gt::cols_align(columns = "label", align = "left")
{gt}函数按它们出现的顺序调用,从 gt::gt()
开始。
如果不希望运行特定的{gt}
函数(即希望更改默认输出格式),则可以在as_gt()
函数中排除任何{gt}
调用。在下面的示例中,将恢复默认对齐方式。
运行as_gt()
函数后,可以使用{gt}
函数向表中添加其他格式。在下面的示例中,源注释被添加到表中:
tbl_summary(trial2, by = trt) |>
as_gt(include = -cols_align) |>
::tab_source_note(gt::md("*This data is simulated*")) gt
Characteristic | Drug A N = 981 |
Drug B N = 1021 |
---|---|---|
Age | 46 (37, 60) | 48 (39, 56) |
Unknown | 7 | 4 |
Grade | ||
I | 35 (36%) | 33 (32%) |
II | 32 (33%) | 36 (35%) |
III | 31 (32%) | 33 (32%) |
This data is simulated | ||
1 Median (Q1, Q3); n (%) |
1.8 Set Default Options with Themes 使用主题设置默认选项
# set_gtsummary_theme(x, quiet)
#
# reset_gtsummary_theme()
#
# get_gtsummary_theme()
#
# with_gtsummary_theme(
# x,
# expr,
# env = rlang::caller_env(),
# msg_ignored_elements = NULL
# )
#
# check_gtsummary_theme(x)
# # Setting JAMA theme for gtsummary
# set_gtsummary_theme(theme_gtsummary_journal("jama"))
# # Themes can be combined by including more than one
# set_gtsummary_theme(theme_gtsummary_compact())
#
# set_gtsummary_theme_ex1 <-
# trial |>
# tbl_summary(by = trt, include = c(age, grade, trt)) |>
# add_stat_label() |>
# as_gt()
#
# # reset gtsummary theme
# reset_gtsummary_theme()
1.9 Survey Data 调查数据
{gtsummary}
包还通过tbl_svysummary()
函数支持调查数据(使用{survey}
包创建的对象)。tbl_svysummary()
和tbl_summary()
的语法几乎相同,上面的示例也适用于调查摘要。(详情可见 :tbl_svysummary包 )
# tbl_svysummary(
# data,
# by = NULL,
# label = NULL,
# statistic = list(all_continuous() ~ "{median} ({p25}, {p75})", all_categorical() ~
# "{n} ({p}%)"),
# digits = NULL,
# type = NULL,
# value = NULL,
# missing = c("ifany", "no", "always"),
# missing_text = "Unknown",
# missing_stat = "{N_miss}",
# sort = all_categorical(FALSE) ~ "alphanumeric",
# percent = c("column", "row", "cell"),
# include = everything()
# )
要开始,请安装{survey}
软件包并加载apiclus 1
数据集。
# install.packages("survey")
# loading the api data set
data(api, package = "survey")
在我们开始之前,我们将数据框转换为调查对象,注册ID和权重列,并设置有限总体校正列。
<-
svy_apiclus1 ::svydesign(
surveyid = ~dnum,
weights = ~pw,
data = apiclus1,
fpc = ~fpc
)
创建survey
对象后,我们现在可以使用tbl_svysummary()
将其汇总为标准数据框。与tbl_summary()
类似,tbl_svysummary()
接受 by=
参数,并与 add_p()
和add_overall()
函数一起工作。
无法将自定义函数传递给 tbl_svysummary()
的statistic=
参数。您必须使用一个预定义的汇总统计函数(例如{mean}
、{median}
),这些函数利用{survey}
包中的函数来计算加权统计。
|>
svy_apiclus1 tbl_svysummary(
# stratify summary statistics by the "both" column
by = both,
# summarize a subset of the columns
include = c(api00, api99, both),
# adding labels to table
label = list(api00 = "API in 2000",
api99 = "API in 1999")
|>
) add_p() |> # comparing values by "both" column
add_overall() |>
# adding spanning header
modify_spanning_header(c("stat_1", "stat_2") ~ "**Met Both Targets**")
Characteristic | Overall N = 6,1941 |
Met Both Targets
|
p-value2 | |
---|---|---|---|---|
No N = 1,6921 |
Yes N = 4,5021 |
|||
API in 2000 | 652 (552, 719) | 631 (559, 710) | 655 (551, 723) | 0.4 |
API in 1999 | 615 (512, 692) | 632 (550, 701) | 613 (499, 687) | 0.2 |
1 Median (Q1, Q3) | ||||
2 Design-based KruskalWallis test |
tbl_svysummary()
还可以处理加权调查数据,其中每行表示多个个体:
|>
Titanic as_tibble() |>
::svydesign(data = _, ids = ~1, weights = ~n) |>
surveytbl_svysummary(include = c(Age, Survived))
Characteristic | N = 2,2011 |
---|---|
Age | |
Adult | 2,092 (95%) |
Child | 109 (5.0%) |
Survived | 711 (32%) |
1 n (%) |
1.10 Cross Tables 交叉表
# tbl_cross(
# data,
# row = 1L,
# col = 2L,
# label = NULL,
# statistic = ifelse(percent == "none", "{n}", "{n} ({p}%)"),
# digits = NULL,
# percent = c("none", "column", "row", "cell"),
# margin = c("column", "row"),
# missing = c("ifany", "always", "no"),
# missing_text = "Unknown",
# margin_text = "Total"
# )
使用tbl_cross()
比较数据中的两个分类变量。tbl_cross()
是tbl_summary()
的包装器,它:
自动向表中添加具有比较变量的名称或标签的跨越标头
默认使用
percent =“cell”
添加行和列边距合计(可通过
margin
参数自定义)显示行变量和列变量中缺少的数据(可通过缺少参数进行自定义)
|>
trial tbl_cross(
row = stage,
col = trt,
percent = "cell"
|>
) add_p()
Chemotherapy Treatment
|
Total | p-value1 | ||
---|---|---|---|---|
Drug A | Drug B | |||
T Stage | 0.9 | |||
T1 | 28 (14%) | 25 (13%) | 53 (27%) | |
T2 | 25 (13%) | 29 (15%) | 54 (27%) | |
T3 | 22 (11%) | 21 (11%) | 43 (22%) | |
T4 | 23 (12%) | 27 (14%) | 50 (25%) | |
Total | 98 (49%) | 102 (51%) | 200 (100%) | |
1 Pearson’s Chi-squared test |
2. tbl_regression() 绘制回归分析结果
# tbl_regression(x, ...)
#
# # Default S3 method
# tbl_regression(
# x, # 回归模型对象
# label = NULL, # 变量命名,如:list(age = "Age", stage = "Path T Stage")
# exponentiate = FALSE, # 是否对系数估计值取幂的逻辑,默认为FALSE
# include = everything(), # 要包含在输出中的变量,默认为All
# show_single_row = NULL, # 默认情况下,分类变量打印在多行上。如果一个变量是二分的(例如是/否),并且您希望打印 回归系数,在这里包括变量名称。
# conf.level = 0.95, # 置信区间/可信区间的置信水平
# intercept = FALSE, # 指示是否在输出中包括截距
# estimate_fun = ifelse(exponentiate, label_style_ratio(), label_style_sigfig()), # 函数对系数估计值进行舍入和格式化
# pvalue_fun = label_style_pvalue(digits = 1),
# tidy_fun = broom.helpers::tidy_with_broom_or_parameters,
# add_estimate_to_reference_rows = FALSE, # 添加参考值
# conf.int = TRUE,
# ...
# )
2.1 Setup 设置
# install.packages("gtsummary")
library(gtsummary)
2.2 Example data set 示例数据集
trial
数据框中的每个变量都被分配了一个属性标签(labelled 包)。
2.3 Basic Usage 基础用法
让我们首先创建一个逻辑回归模型,使用trial
数据集的变量年龄和等级来预测肿瘤反应。
# build logistic regression model
<- glm(response ~ age + stage, trial, family = binomial)
m1
# view raw model results
summary(m1)$coefficients
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.48622424 0.62022844 -2.3962530 0.01656365
age 0.01939109 0.01146813 1.6908683 0.09086195
stageT2 -0.54142643 0.44000267 -1.2305071 0.21850725
stageT3 -0.05953479 0.45042027 -0.1321761 0.89484501
stageT4 -0.23108633 0.44822835 -0.5155549 0.60616530
然后,我们将使用一个回归模型表来总结并显示这些结果,只需使用{gtsummary}
中的一行代码。
tbl_regression(m1, exponentiate = TRUE)
Characteristic | OR1 | 95% CI1 | p-value |
---|---|---|---|
Age | 1.02 | 1.00, 1.04 | 0.091 |
T Stage | |||
T1 | — | — | |
T2 | 0.58 | 0.24, 1.37 | 0.2 |
T3 | 0.94 | 0.39, 2.28 | 0.9 |
T4 | 0.79 | 0.33, 1.90 | 0.6 |
1 OR = Odds Ratio, CI = Confidence Interval |
该模型被识别为logistic回归,系数取幂,因此标题显示比值比为“OR”
系统会自动检测变量类型,并为分类变量添加Reference行
对模型估计值和置信区间进行四舍五入和格式化
由于数据集中的变量已被标记,因此这些标记将被带入
{gtsummary}
输出表。如果数据没有被标记,默认是显示变量名。变量水平缩进并添加脚注
2.4 Customize Output 自定义输出
2.4.1 Modifying function arguments 修改函数参数
2.4.2 {gtsummary}
functions to add information
2.4.3 {gtsummary}
functions to format table {gtsummary}用于格式化表格的函数
2.4.4 {gt}
functions to format table {gt}
函数格式化表格
|>
m1 tbl_regression(exponentiate = TRUE) |>
as_gt() |>
::tab_source_note(gt::md("*This data is simulated*")) gt
Characteristic | OR1 | 95% CI1 | p-value |
---|---|---|---|
Age | 1.02 | 1.00, 1.04 | 0.091 |
T Stage | |||
T1 | — | — | |
T2 | 0.58 | 0.24, 1.37 | 0.2 |
T3 | 0.94 | 0.39, 2.28 | 0.9 |
T4 | 0.79 | 0.33, 1.90 | 0.6 |
This data is simulated | |||
1 OR = Odds Ratio, CI = Confidence Interval |
例如:
对系数取幂以给出比值比
报告阶段的总体p值-较大的p值四舍五入至两位小数
小于0.10的P值为粗体-变量标签为粗体
变量水平以斜体表示
# format results into data frame with global p-values
|>
m1 tbl_regression(
exponentiate = TRUE,
pvalue_fun = label_style_pvalue(digits = 2),
|>
) add_global_p() |>
bold_p(t = 0.10) |>
bold_labels() |>
italicize_levels()
Characteristic | OR1 | 95% CI1 | p-value |
---|---|---|---|
Age | 1.02 | 1.00, 1.04 | 0.087 |
T Stage | 0.62 | ||
T1 | — | — | |
T2 | 0.58 | 0.24, 1.37 | |
T3 | 0.94 | 0.39, 2.28 | |
T4 | 0.79 | 0.33, 1.90 | |
1 OR = Odds Ratio, CI = Confidence Interval |
2.5 Univariate Regression 单变量回归
# tbl_uvregression(data, ...)
#
# # S3 method for class 'data.frame'
# tbl_uvregression(
# data, # 数据框
# y = NULL, # 模型结局(例如,y=复发或y=Surv(时间,复发))
# x = NULL, # 协变量(例如,x=trt 在include中指定的所有其他列将针对常数y或x进行回归
# method, # 回归方法或函数,lm、glm、survival::coxph、survey::svyglm
# method.args = list(),
# exponentiate = FALSE, # 是否对系数估计值取幂
# label = NULL, # 变量重命名
# include = everything(), # 包含在输出中的变量
# tidy_fun = broom.helpers::tidy_with_broom_or_parameters, # 默认使用broom::tidy()。 如果发生错误,则尝试使用 parameters::model_parameters(),如果已安装。
# hide_n = FALSE, # 隐藏N列
# show_single_row = NULL, # 默认情况下,分类变量打印在多行上
# conf.level = 0.95, # 置信区间/可信区间的置信水平
# estimate_fun = ifelse(exponentiate, label_style_ratio(), label_style_sigfig()),
# pvalue_fun = label_style_pvalue(digits = 1),
# formula = "{y} ~ {x}",# 模型公式的字符串
# add_estimate_to_reference_rows = FALSE,
# conf.int = TRUE,
# ...
# )
#
# # S3 method for class 'survey.design'
# tbl_uvregression(
# data,
# y = NULL,
# x = NULL,
# method,
# method.args = list(),
# exponentiate = FALSE,
# label = NULL,
# include = everything(),
# tidy_fun = broom.helpers::tidy_with_broom_or_parameters,
# hide_n = FALSE,
# show_single_row = NULL,
# conf.level = 0.95,
# estimate_fun = ifelse(exponentiate, label_style_ratio(), label_style_sigfig()),
# pvalue_fun = label_style_pvalue(digits = 1),
# formula = "{y} ~ {x}",
# add_estimate_to_reference_rows = FALSE,
# conf.int = TRUE,
# ...
# )
函数的作用是:生成一个单变量回归模型表。该函数是tbl_regression()的包装器,因此接受几乎相同的函数参数。可以用与tbl_regression()类似的方式修改函数的结果
# Example 1 ----------------------------------
tbl_uvregression(
trial,method = glm,
y = response,
method.args = list(family = binomial),
exponentiate = TRUE,
include = c("age", "grade")
)
Characteristic | N | OR1 | 95% CI1 | p-value |
---|---|---|---|---|
Age | 183 | 1.02 | 1.00, 1.04 | 0.10 |
Grade | 193 | |||
I | — | — | ||
II | 0.95 | 0.45, 2.00 | 0.9 | |
III | 1.10 | 0.52, 2.29 | 0.8 | |
1 OR = Odds Ratio, CI = Confidence Interval |
# Example 2 ----------------------------------
# rounding pvalues to 2 decimal places
library(survival)
tbl_uvregression(
trial,method = coxph,
y = Surv(ttdeath, death),
exponentiate = TRUE,
include = c("age", "grade", "response"),
pvalue_fun = label_style_pvalue(digits = 2)
)
Characteristic | N | HR1 | 95% CI1 | p-value |
---|---|---|---|---|
Age | 189 | 1.01 | 0.99, 1.02 | 0.33 |
Grade | 200 | |||
I | — | — | ||
II | 1.28 | 0.80, 2.05 | 0.31 | |
III | 1.69 | 1.07, 2.66 | 0.024 | |
Tumor Response | 193 | 0.50 | 0.31, 0.78 | 0.003 |
1 HR = Hazard Ratio, CI = Confidence Interval |
|>
trial tbl_uvregression(
method = glm,
y = response,
include = c(age, grade),
method.args = list(family = binomial),
exponentiate = TRUE,
pvalue_fun = label_style_pvalue(digits = 2)
|>
) add_global_p() |> # add global p-value
add_nevent() |> # add number of events of the outcome
add_q() |> # adjusts global p-values for multiple testing
bold_p() |> # bold p-values under a given threshold (default 0.05)
bold_p(t = 0.10, q = TRUE) |> # now bold q-values under the threshold of 0.10
bold_labels()
Characteristic | N | Event N | OR1 | 95% CI1 | p-value | q-value2 |
---|---|---|---|---|---|---|
Age | 183 | 58 | 1.02 | 1.00, 1.04 | 0.091 | 0.18 |
Grade | 193 | 61 | 0.93 | 0.93 | ||
I | — | — | ||||
II | 0.95 | 0.45, 2.00 | ||||
III | 1.10 | 0.52, 2.29 | ||||
1 OR = Odds Ratio, CI = Confidence Interval | ||||||
2 False discovery rate correction for multiple testing |
2.6 Setting Default Options 设置默认选项
2.7 Supported Models 已支持的模型
3. Frequently Asked Questions 常见问题
FAQ + Gallery: FAQ
3.1 Summary Tables 汇总表
3.1.1 组列上添加跨越标题以增加清晰度
modify_spanning_header(all_stat_cols() ~ "**Chemotherapy Treatment**")
|>
trial tbl_summary(
by = trt,
include = c(age, grade),
missing = "no",
statistic = all_continuous() ~ "{median} ({p25}, {p75})"
|>
) modify_header(all_stat_cols() ~ "**{level}** \nN = {n} ({style_percent(p)}%)") |>
add_n() |>
bold_labels() |>
modify_spanning_header(all_stat_cols() ~ "**Chemotherapy Treatment**")
Characteristic | N |
Chemotherapy Treatment
|
|
---|---|---|---|
Drug A N = 98 (49%)1 |
Drug B N = 102 (51%)1 |
||
Age | 189 | 46 (37, 60) | 48 (39, 56) |
Grade | 200 | ||
I | 35 (36%) | 33 (32%) | |
II | 32 (33%) | 36 (35%) | |
III | 31 (32%) | 33 (32%) | |
1 Median (Q1, Q3); n (%) |
3.1.2 在多行上显示
|>
trial tbl_summary(
by = trt,
include = c(age, marker),
type = all_continuous() ~ "continuous2",
statistic =
all_continuous() ~ c("{N_nonmiss}",
"{mean} ({sd})",
"{median} ({p25}, {p75})",
"{min}, {max}"),
missing = "no"
|>
) italicize_levels()
Characteristic | Drug A N = 98 |
Drug B N = 102 |
---|---|---|
Age | ||
N Non-missing | 91 | 98 |
Mean (SD) | 47 (15) | 47 (14) |
Median (Q1, Q3) | 46 (37, 60) | 48 (39, 56) |
Min, Max | 6, 78 | 9, 83 |
Marker Level (ng/mL) | ||
N Non-missing | 92 | 98 |
Mean (SD) | 1.02 (0.89) | 0.82 (0.83) |
Median (Q1, Q3) | 0.84 (0.23, 1.60) | 0.52 (0.18, 1.21) |
Min, Max | 0.00, 3.87 | 0.01, 3.64 |
3.1.3 修改格式化p值的函数,更改变量标签
|>
trial mutate(response = factor(response, labels = c("No Tumor Response", "Tumor Responded"))) |>
tbl_summary(
by = response,
include = c(age, grade),
missing = "no",
label = list(age ~ "Patient Age", grade ~ "Tumor Grade")
|>
) add_p(pvalue_fun = label_style_pvalue(digits = 2)) |>
add_q()
7 missing rows in the "response" column have been removed.
Characteristic | No Tumor Response N = 1321 |
Tumor Responded N = 611 |
p-value2 | q-value3 |
---|---|---|---|---|
Patient Age | 46 (36, 55) | 49 (43, 59) | 0.091 | 0.18 |
Tumor Grade | 0.93 | 0.93 | ||
I | 46 (35%) | 21 (34%) | ||
II | 44 (33%) | 19 (31%) | ||
III | 42 (32%) | 21 (34%) | ||
1 Median (Q1, Q3); n (%) | ||||
2 Wilcoxon rank sum test; Pearson’s Chi-squared test | ||||
3 False discovery rate correction for multiple testing |
3.1.4 使用 forcats::fct_na_value_to_level
将缺失值纳入
|>
trial mutate(
response =
factor(response, labels = c("No Tumor Response", "Tumor Responded")) |>
::fct_na_value_to_level(level = "Missing Response Status")
forcats|>
) tbl_summary(
by = response,
include = c(age, grade),
label = list(age ~ "Patient Age", grade ~ "Tumor Grade")
)
Characteristic | No Tumor Response N = 1321 |
Tumor Responded N = 611 |
Missing Response Status N = 71 |
---|---|---|---|
Patient Age | 46 (36, 55) | 49 (43, 59) | 52 (42, 57) |
Unknown | 7 | 3 | 1 |
Tumor Grade | |||
I | 46 (35%) | 21 (34%) | 1 (14%) |
II | 44 (33%) | 19 (31%) | 5 (71%) |
III | 42 (32%) | 21 (34%) | 1 (14%) |
1 Median (Q1, Q3); n (%) |
3.1.5 报告两组之间的治疗差异
|>
trial tbl_summary(
by = trt,
include = c(response, marker),
statistic = list(
all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{p}%"
),missing = "no"
|>
) add_difference() |>
add_n() |>
modify_header(all_stat_cols() ~ "**{level}**")
Characteristic | N | Drug A1 | Drug B1 | Difference2 | 95% CI2,3 | p-value2 |
---|---|---|---|---|---|---|
Tumor Response | 193 | 29% | 34% | -4.2% | -18%, 9.9% | 0.6 |
Marker Level (ng/mL) | 190 | 1.02 (0.89) | 0.82 (0.83) | 0.20 | -0.05, 0.44 | 0.12 |
1 | ||||||
2 2-sample test for equality of proportions with continuity correction; Welch Two Sample t-test | ||||||
3 CI = Confidence Interval |
3.1.6 Paired t-test and McNemar’s test.
# imagine that each patient received Drug A and Drug B (adding ID showing their paired measurements)
<-
trial_paired |>
trial select(trt, marker, response) |>
mutate(.by = trt, id = dplyr::row_number())
# you must first delete incomplete pairs from the data, then you can build the table
|>
trial_paired # delete missing values
::drop_na() |>
tidyr# keep IDs with both measurements
::filter(.by = id, dplyr::n() == 2) |>
dplyr# summarize data
tbl_summary(by = trt, include = -id) |>
add_p(
test = list(marker ~ "paired.t.test",
~ "mcnemar.test"),
response group = id
)
Characteristic | Drug A N = 831 |
Drug B N = 831 |
p-value2 |
---|---|---|---|
Marker Level (ng/mL) | 0.82 (0.22, 1.71) | 0.53 (0.17, 1.31) | 0.2 |
Tumor Response | 21 (25%) | 28 (34%) | 0.3 |
1 Median (Q1, Q3); n (%) | |||
2 Paired t-test; McNemar’s Chi-squared test with continuity correction |
3.1.7 将所有组与单个参考组进行比较的p值
# table summarizing data with no p-values
<- trial |> select(grade, age, response)
small_trial <- small_trial |>
t0 tbl_summary(by = grade, missing = "no") |>
modify_header(all_stat_cols() ~ "**{level}**")
# table comparing grade I and II
<- small_trial |>
t1 ::filter(grade %in% c("I", "II")) |>
dplyrtbl_summary(by = grade, missing = "no") |>
add_p() |>
modify_header(p.value ~ "**I vs. II**") |>
# hide summary stat columns
modify_column_hide(all_stat_cols())
# table comparing grade I and II
<- small_trial |>
t2 ::filter(grade %in% c("I", "III")) |>
dplyrtbl_summary(by = grade, missing = "no") |>
add_p() |>
modify_header(p.value = "**I vs. III**") |>
# hide summary stat columns
modify_column_hide(all_stat_cols())
# merging the 3 tables together, and adding additional gt formatting
tbl_merge(list(t0, t1, t2)) |>
modify_spanning_header(
all_stat_cols() ~ "**Tumor Grade**",
starts_with("p.value") ~ "**p-values**"
)
Characteristic |
Tumor Grade
|
p-values
|
|||
---|---|---|---|---|---|
I1 | II1 | III1 | I vs. II2 | I vs. III2 | |
Age | 47 (37, 56) | 49 (37, 57) | 47 (38, 58) | 0.7 | 0.5 |
Tumor Response | 21 (31%) | 19 (30%) | 21 (33%) | >0.9 | 0.9 |
1 Median (Q1, Q3); n (%) | |||||
2 Wilcoxon rank sum test; Fisher’s exact test |
3.1.8 多个变量分层的汇总表
|>
trial select(trt, grade, age, stage) |>
mutate(grade = paste("Grade", grade)) |>
tbl_strata(
strata = grade,
~ .x |>
tbl_summary(by = trt, missing = "no") |>
modify_header(all_stat_cols() ~ "**{level}**")
)
Characteristic |
Grade I
|
Grade II
|
Grade III
|
|||
---|---|---|---|---|---|---|
Drug A1 | Drug B1 | Drug A1 | Drug B1 | Drug A1 | Drug B1 | |
Age | 46 (36, 60) | 48 (42, 55) | 45 (31, 55) | 51 (42, 58) | 52 (42, 61) | 45 (36, 52) |
T Stage | ||||||
T1 | 8 (23%) | 9 (27%) | 14 (44%) | 9 (25%) | 6 (19%) | 7 (21%) |
T2 | 8 (23%) | 10 (30%) | 8 (25%) | 9 (25%) | 9 (29%) | 10 (30%) |
T3 | 11 (31%) | 7 (21%) | 5 (16%) | 6 (17%) | 6 (19%) | 8 (24%) |
T4 | 8 (23%) | 7 (21%) | 5 (16%) | 12 (33%) | 10 (32%) | 8 (24%) |
1 Median (Q1, Q3); n (%) |
3.2 Regression Tables 回归表
3.2.1 单变量回归表中包括观察数和事件数
|>
trial tbl_uvregression(
method = glm,
y = response,
include = c(age, grade),
method.args = list(family = binomial),
exponentiate = TRUE
|>
) add_nevent()
Characteristic | N | Event N | OR1 | 95% CI1 | p-value |
---|---|---|---|---|---|
Age | 183 | 58 | 1.02 | 1.00, 1.04 | 0.10 |
Grade | 193 | 61 | |||
I | — | — | |||
II | 0.95 | 0.45, 2.00 | 0.9 | ||
III | 1.10 | 0.52, 2.29 | 0.8 | ||
1 OR = Odds Ratio, CI = Confidence Interval |
3.2.2 包括两个相关的模型并排与描述性统计
<- glm(response ~ trt + grade, trial, family = binomial) |>
gt_r1 tbl_regression(exponentiate = TRUE)
<- survival::coxph(survival::Surv(ttdeath, death) ~ trt + grade, trial) |>
gt_r2 tbl_regression(exponentiate = TRUE)
<- trial |>
gt_t1 tbl_summary(include = c(trt, grade), missing = "no") |>
add_n() |>
modify_header(stat_0 = "**n (%)**") |>
modify_footnote(stat_0 = NA_character_)
theme_gtsummary_compact()
Setting theme "Compact"
tbl_merge(
list(gt_t1, gt_r1, gt_r2),
tab_spanner = c(NA_character_, "**Tumor Response**", "**Time to Death**")
)
Characteristic | N | n (%) |
Tumor Response
|
Time to Death
|
||||
---|---|---|---|---|---|---|---|---|
OR1 | 95% CI1 | p-value | HR1 | 95% CI1 | p-value | |||
Chemotherapy Treatment | 200 | |||||||
Drug A | 98 (49%) | — | — | — | — | |||
Drug B | 102 (51%) | 1.21 | 0.66, 2.24 | 0.5 | 1.25 | 0.86, 1.81 | 0.2 | |
Grade | 200 | |||||||
I | 68 (34%) | — | — | — | — | |||
II | 68 (34%) | 0.94 | 0.44, 1.98 | 0.9 | 1.28 | 0.80, 2.06 | 0.3 | |
III | 64 (32%) | 1.09 | 0.52, 2.27 | 0.8 | 1.69 | 1.07, 2.66 | 0.024 | |
1 OR = Odds Ratio, CI = Confidence Interval, HR = Hazard Ratio |
3.2.4 包括分类预测因子每个水平的事件数量
|>
trial tbl_uvregression(
method = survival::coxph,
y = survival::Surv(ttdeath, death),
include = c(stage, grade),
exponentiate = TRUE,
hide_n = TRUE
|>
) add_nevent(location = "level")
Characteristic | Event N | HR1 | 95% CI1 | p-value |
---|---|---|---|---|
T Stage | ||||
T1 | 24 | — | — | |
T2 | 27 | 1.18 | 0.68, 2.04 | 0.6 |
T3 | 22 | 1.23 | 0.69, 2.20 | 0.5 |
T4 | 39 | 2.48 | 1.49, 4.14 | <0.001 |
Grade | ||||
I | 33 | — | — | |
II | 36 | 1.28 | 0.80, 2.05 | 0.3 |
III | 43 | 1.69 | 1.07, 2.66 | 0.024 |
1 HR = Hazard Ratio, CI = Confidence Interval |
3.2.5 回归模型,其中协变量保持不变,结果发生变化
|>
trial tbl_uvregression(
method = lm,
x = trt,
show_single_row = "trt",
hide_n = TRUE,
include = c(age, marker)
|>
) modify_header(label = "**Model Outcome**",
estimate = "**Treatment Coef.**") |>
modify_footnote(estimate = "Values larger than 0 indicate larger values in the Drug B group.")
Model Outcome | Treatment Coef.1 | 95% CI2 | p-value |
---|---|---|---|
Age | 0.44 | -3.7, 4.6 | 0.8 |
Marker Level (ng/mL) | -0.20 | -0.44, 0.05 | 0.12 |
1 Values larger than 0 indicate larger values in the Drug B group. | |||
2 CI = Confidence Interval |
3.2.6 在p值较低的估计值上使用显著性星号
|>
trial tbl_uvregression(
method = survival::coxph,
y = survival::Surv(ttdeath, death),
include = c(stage, grade),
exponentiate = TRUE,
|>
) add_significance_stars()
Characteristic | N | HR1,2 | SE2 |
---|---|---|---|
T Stage | 200 | ||
T1 | — | — | |
T2 | 1.18 | 0.281 | |
T3 | 1.23 | 0.295 | |
T4 | 2.48*** | 0.260 | |
Grade | 200 | ||
I | — | — | |
II | 1.28 | 0.241 | |
III | 1.69* | 0.232 | |
1 p<0.05; p<0.01; p<0.001 | |||
2 HR = Hazard Ratio, SE = Standard Error |