Introduction

The {gtsummary} package provides an elegant and flexible way to create publication-ready analytical and summary tables using the R programming language. The {gtsummary} package summarizes data sets, regression models, and more, using sensible defaults with highly customizable capabilities.

{gtsummary} Overview

  • Create tabular summaries with sensible defaults but highly customizable
  • Types of summaries:
    • One-way tables
    • Tow-way tables
    • Cross-tabulation
    • Regression models
    • Survival data
    • Survey data
    • Custom tables
  • Report statistics from {gtsummary} tables inline in R Markdown
  • Stack and/or merge any table type
  • Use themes to standardize across tables
  • Choose from different print engines

Getting Started

# Installation
install.packages("gtsummary")

# Install the development version of {gtsummary}
remotes::install_github("ddsjoberg/gtsummary")

Load Packages

# Load packages 
library(tidyverse)
library(gtsummary)
library(kableExtra)
library(gt)
lungcap <- read.csv("data/LungCapData.csv")
lungcap %>% 
  head() %>% 
  gt() 
LungCap Age Height Smoke Gender Caesarean
6.475 6 62.1 no male no
10.125 18 74.7 yes female no
9.550 16 69.7 no female yes
11.125 14 71.0 no male no
4.800 5 56.9 no male no
6.225 11 58.7 no female no

Basic Summary Tables using tbl_summary()

Summary Tables

# Create a basic summary table 
trial %>% 
  select(trt, age, grade, response) %>% 
  tbl_summary()
Characteristic N = 2001
Chemotherapy Treatment
Drug A 98 (49%)
Drug B 102 (51%)
Age 47 (38, 57)
Unknown 11
Grade
I 68 (34%)
II 68 (34%)
III 64 (32%)
Tumor Response 61 (32%)
Unknown 7
1 n (%); Median (IQR)
# Split table by group 
trial %>% 
  select(trt, age, grade, response) %>% 
  tbl_summary(by = trt)
Characteristic Drug A, N = 981 Drug B, N = 1021
Age 46 (37, 59) 48 (39, 56)
Unknown 7 4
Grade
I 35 (36%) 33 (32%)
II 32 (33%) 36 (35%)
III 31 (32%) 33 (32%)
Tumor Response 28 (29%) 33 (34%)
Unknown 3 4
1 Median (IQR); n (%)
# Skip missing data separately 
trial %>% 
  select(trt, age, grade, response) %>% 
  tbl_summary(by = trt, missing = "no")
Characteristic Drug A, N = 981 Drug B, N = 1021
Age 46 (37, 59) 48 (39, 56)
Grade
I 35 (36%) 33 (32%)
II 32 (33%) 36 (35%)
III 31 (32%) 33 (32%)
Tumor Response 28 (29%) 33 (34%)
1 Median (IQR); n (%)
# Add column with total number of non-missing observations  
trial %>% 
  select(trt, age, grade, response) %>% 
  tbl_summary(by = trt, missing = "no") %>% 
  add_n()
Characteristic N Drug A, N = 981 Drug B, N = 1021
Age 189 46 (37, 59) 48 (39, 56)
Grade 200
I 35 (36%) 33 (32%)
II 32 (33%) 36 (35%)
III 31 (32%) 33 (32%)
Tumor Response 193 28 (29%) 33 (34%)
1 Median (IQR); n (%)
# Test for a difference between groups
trial %>% 
  select(trt, age, grade, response) %>% 
  tbl_summary(by = trt, missing = "no") %>% 
  add_n() %>% 
  add_p()
Characteristic N Drug A, N = 981 Drug B, N = 1021 p-value2
Age 189 46 (37, 59) 48 (39, 56) 0.7
Grade 200 0.9
I 35 (36%) 33 (32%)
II 32 (33%) 36 (35%)
III 31 (32%) 33 (32%)
Tumor Response 193 28 (29%) 33 (34%) 0.5
1 Median (IQR); n (%)
2 Wilcoxon rank sum test; Pearson's Chi-squared test
# Update the column header 
trial %>% 
  select(trt, age, grade, response) %>% 
  tbl_summary(by = trt, missing = "no") %>% 
  add_n() %>% 
  add_p() %>% 
  modify_header(label = "**Variables**")
Variables N Drug A, N = 981 Drug B, N = 1021 p-value2
Age 189 46 (37, 59) 48 (39, 56) 0.7
Grade 200 0.9
I 35 (36%) 33 (32%)
II 32 (33%) 36 (35%)
III 31 (32%) 33 (32%)
Tumor Response 193 28 (29%) 33 (34%) 0.5
1 Median (IQR); n (%)
2 Wilcoxon rank sum test; Pearson's Chi-squared test
# Text formatting: Bold labels 
trial %>% 
  select(trt, age, grade, response) %>% 
  tbl_summary(by = trt, missing = "no") %>% 
  add_n() %>% 
  add_p() %>% 
  modify_header(label = "**Variables**") %>% 
  bold_labels()
Variables N Drug A, N = 981 Drug B, N = 1021 p-value2
Age 189 46 (37, 59) 48 (39, 56) 0.7
Grade 200 0.9
I 35 (36%) 33 (32%)
II 32 (33%) 36 (35%)
III 31 (32%) 33 (32%)
Tumor Response 193 28 (29%) 33 (34%) 0.5
1 Median (IQR); n (%)
2 Wilcoxon rank sum test; Pearson's Chi-squared test
# Text formatting: Italicize Levels
trial %>% 
  select(trt, age, grade, response) %>% 
  tbl_summary(by = trt, missing = "no") %>% 
  add_n() %>% 
  add_p() %>% 
  modify_header(label = "**Variables**") %>% 
  bold_labels() %>% 
  italicize_levels()
Variables N Drug A, N = 981 Drug B, N = 1021 p-value2
Age 189 46 (37, 59) 48 (39, 56) 0.7
Grade 200 0.9
I 35 (36%) 33 (32%)
II 32 (33%) 36 (35%)
III 31 (32%) 33 (32%)
Tumor Response 193 28 (29%) 33 (34%) 0.5
1 Median (IQR); n (%)
2 Wilcoxon rank sum test; Pearson's Chi-squared test
# Table formatting: Pretty print with gt() function
trial %>% 
  select(trt, age, grade, response) %>% 
  tbl_summary(by = trt, missing = "no") %>% 
  add_n() %>% 
  add_p() %>% 
  modify_header(label = "**Variables**") %>% 
  bold_labels() %>% 
  italicize_levels() %>% 
  as_gt() 
Variables N Drug A, N = 981 Drug B, N = 1021 p-value2
Age 189 46 (37, 59) 48 (39, 56) 0.7
Grade 200 0.9
I 35 (36%) 33 (32%)
II 32 (33%) 36 (35%)
III 31 (32%) 33 (32%)
Tumor Response 193 28 (29%) 33 (34%) 0.5
1 Median (IQR); n (%)
2 Wilcoxon rank sum test; Pearson's Chi-squared test
# Table formatting: Add title 
trial %>% 
  select(trt, age, grade, response) %>% 
  tbl_summary(by = trt, missing = "no") %>% 
  add_n() %>% 
  add_p() %>% 
  modify_header(label = "**Variables**") %>% 
  bold_labels() %>% 
  italicize_levels() %>% 
  as_gt() %>% 
  gt::tab_header(title = "Table 1. Summary table of 200 patients who received Drug A or Drug B")
Table 1. Summary table of 200 patients who received Drug A or Drug B
Variables N Drug A, N = 981 Drug B, N = 1021 p-value2
Age 189 46 (37, 59) 48 (39, 56) 0.7
Grade 200 0.9
I 35 (36%) 33 (32%)
II 32 (33%) 36 (35%)
III 31 (32%) 33 (32%)
Tumor Response 193 28 (29%) 33 (34%) 0.5
1 Median (IQR); n (%)
2 Wilcoxon rank sum test; Pearson's Chi-squared test
# Table formatting: Add Markdown & Bold Title 
trial %>% 
  select(trt, age, grade, response) %>% 
  tbl_summary(by = trt, missing = "no") %>% 
  add_n() %>% 
  add_p() %>% 
  modify_header(label = "**Variables**") %>% 
  bold_labels() %>% 
  italicize_levels() %>% 
  as_gt() %>% 
  gt::tab_header(title = gt::md("**Table 1. Summary table of 200 patients who received Drug A or Drug B**")) 
Table 1. Summary table of 200 patients who received Drug A or Drug B
Variables N Drug A, N = 981 Drug B, N = 1021 p-value2
Age 189 46 (37, 59) 48 (39, 56) 0.7
Grade 200 0.9
I 35 (36%) 33 (32%)
II 32 (33%) 36 (35%)
III 31 (32%) 33 (32%)
Tumor Response 193 28 (29%) 33 (34%) 0.5
1 Median (IQR); n (%)
2 Wilcoxon rank sum test; Pearson's Chi-squared test
# Table formatting: Add Markdown & Italic Title 
trial %>% 
  select(trt, age, grade, response) %>% 
  tbl_summary(by = trt, missing = "no") %>% 
  add_n() %>% 
  add_p() %>% 
  modify_header(label = "**Variables**") %>% 
  bold_labels() %>% 
  italicize_levels() %>% 
  as_gt() %>% 
  gt::tab_header(title = gt::md("*Table 1. Summary table of 200 patients who received Drug A or Drug B*")) 
Table 1. Summary table of 200 patients who received Drug A or Drug B
Variables N Drug A, N = 981 Drug B, N = 1021 p-value2
Age 189 46 (37, 59) 48 (39, 56) 0.7
Grade 200 0.9
I 35 (36%) 33 (32%)
II 32 (33%) 36 (35%)
III 31 (32%) 33 (32%)
Tumor Response 193 28 (29%) 33 (34%) 0.5
1 Median (IQR); n (%)
2 Wilcoxon rank sum test; Pearson's Chi-squared test