test customzied gtsummary fuctions

CSR
Tables
Figures
Author

Eric Chen

Published

January 24, 2025

Code
rm(list=ls(all=TRUE))
library(tidyverse)
library(officer)
library(lubridate)

library(rio)
library(gtsummary)
library(flextable)
library(dplyr)
library(haven)
library(gt)
library(survival)

CDISCPILOT01

Brief Description of Case Study

This is a file contains different results from different function calls which are built for general analysis purposes. Cannot render to PDF because some coding is not compatiable with Latex environment.


Tables

Tables 14-1

Table 14-1.01

set #| echo: false if you don’t want code to be displayed in WORD output or HTML output.

Code
# Source the R script that contains the create_summary_table function
# source("C:/Users/zunqi/OneDrive/R related/R and AI/functions/continuous and categorical table.r")
 source("C:/Users/zunqi/OneDrive/R related/R and AI/functions/continuous and categorical table with Pvalue.r")
 source("C:/Users/zunqi/OneDrive/R related/R and AI/functions/simple regression with table.r")

# IMPORT DATA
adsl_orig <- haven::read_xpt(
   'https://raw.githubusercontent.com/cdisc-org/sdtm-adam-pilot-project/master/updated-pilot-submission-package/900172/m5/datasets/cdiscpilot01/analysis/adam/datasets/adsl.xpt')

adsl_orig.sub<-adsl_orig%>%filter(ARM %in% c("Placebo", "Xanomeline High Dose"))
# Call the create_summary_table function

create_summary_table(
  data = adsl_orig,
  by_var = TRT01A,
  continuous_vars = c("AGE", "BMIBL", "WEIGHTBL", "HEIGHTBL"),
  categorical_vars = c("AGEGR1", "SEX", "RACE", "ETHNIC"),
  var_order = c("AGE", "AGEGR1", "BMIBL", "WEIGHTBL", "HEIGHTBL", "SEX", "RACE", "ETHNIC"),
  continuous_digits =c(0, 1, 2, 1, 1, 1, 2, 2),
  continuous_labels = list(
    AGE = "Age",
    BMIBL = "BMI",
    WEIGHTBL = "WEIGHT AT BL",
    HEIGHTBL = "HEIGHT AT BL"
  ),
  categorical_labels = list(
    AGEGR1 = "AGE GROUP"
  ),
  include_overall = TRUE,
  add_p=TRUE,
  add_CI=TRUE,
  table_title="Table 14-1.01: Summary of Demographics",
  population="Full dataset",
    output_type = c("tbl_summary")
  # output_type = c("flextable")
  #output_type="tibble"
)
Table 14-1.01: Summary of Demographics
Full dataset
Characteristic Total
(N=254)1
95% CI2 Placebo
N = 861
95% CI2 Xanomeline High Dose
N = 841
95% CI2 Xanomeline Low Dose
N = 841
95% CI2 P Value3
Age
74, 76
73, 77
73, 76
74, 77 0.442
    n 254
86
84
84

    Mean (SD) 75.1 (8.25)
75.2 (8.59)
74.4 (7.89)
75.7 (8.29)

    Median (P25, P75) 77.0 (70.0, 81.0)
76.0 (69.0, 82.0)
76.0 (70.5, 80.0)
77.5 (71.0, 82.0)

    Min, Max 51.00, 89.00
52.00, 89.00
56.00, 88.00
51.00, 88.00

AGE GROUP, n (%)







0.144
    <65 33 (13%) 9.2%, 18% 14 (16%) 9.5%, 26% 11 (13%) 7.0%, 23% 8 (9.5%) 4.5%, 18%
    >80 77 (30%) 25%, 36% 30 (35%) 25%, 46% 18 (21%) 14%, 32% 29 (35%) 25%, 46%
    65-80 144 (57%) 50%, 63% 42 (49%) 38%, 60% 55 (65%) 54%, 75% 47 (56%) 45%, 67%
BMI
24, 25
23, 24
24, 26
24, 26 0.010
    n 253
86
84
83

    Mean (SD) 24.7 (4.09)
23.6 (3.67)
25.3 (4.16)
25.1 (4.27)

    Median (P25, P75) 24.2 (21.9, 27.3)
23.4 (21.2, 25.6)
24.8 (22.7, 27.9)
24.3 (22.1, 27.8)

    Min, Max 13.70, 40.10
15.10, 33.30
13.70, 34.50
17.70, 40.10

    Missing 1
0
0
1

WEIGHT AT BL
65, 68
60, 65
67, 73
64, 70 0.011
    n 253
86
84
83

    Mean (SD) 66.6 (14.13)
62.8 (12.77)
70.0 (14.65)
67.3 (14.12)

    Median (P25, P75) 66.7 (55.3, 77.1)
60.6 (53.5, 74.4)
69.2 (56.8, 80.3)
64.9 (55.8, 77.8)

    Min, Max 34.00, 108.00
34.00, 86.20
41.70, 108.00
45.40, 106.10

    Missing 1
0
0
1

HEIGHT AT BL
163, 165
160, 165
164, 168
161, 166 0.134
    n 254
86
84
84

    Mean (SD) 163.9 (10.76)
162.6 (11.52)
165.8 (10.13)
163.4 (10.42)

    Median (P25, P75) 162.9 (156.2, 171.5)
162.6 (153.7, 171.5)
165.1 (157.5, 172.9)
162.6 (157.5, 170.2)

    Min, Max 135.90, 195.60
137.20, 185.40
146.10, 190.50
135.90, 195.60

Sex, n (%)







0.141
    F 143 (56%) 50%, 62% 53 (62%) 50%, 72% 40 (48%) 37%, 59% 50 (60%) 48%, 70%
    M 111 (44%) 38%, 50% 33 (38%) 28%, 50% 44 (52%) 41%, 63% 34 (40%) 30%, 52%
Race, n (%)







0.680
    AMERICAN INDIAN OR ALASKA NATIVE 1 (0.4%) 0.02%, 2.5% 0 (0%) 0.00%, 5.3% 1 (1.2%) 0.06%, 7.4% 0 (0%) 0.00%, 5.4%
    BLACK OR AFRICAN AMERICAN 23 (9.1%) 5.9%, 13% 8 (9.3%) 4.4%, 18% 9 (11%) 5.3%, 20% 6 (7.1%) 2.9%, 15%
    WHITE 230 (91%) 86%, 94% 78 (91%) 82%, 96% 74 (88%) 79%, 94% 78 (93%) 85%, 97%
Ethnicity, n (%)







0.570
    HISPANIC OR LATINO 12 (4.7%) 2.6%, 8.3% 3 (3.5%) 0.91%, 11% 3 (3.6%) 0.93%, 11% 6 (7.1%) 2.9%, 15%
    NOT HISPANIC OR LATINO 242 (95%) 92%, 97% 83 (97%) 89%, 99% 81 (96%) 89%, 99% 78 (93%) 85%, 97%
1 source: produced through continuous and categorical table with Pvalue.r
2 CI = Confidence Interval
3 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test; Fisher’s exact test
Code
create_summary_table(
  data = adsl_orig.sub,
  by_var = TRT01A,
  continuous_vars = c("AGE", "BMIBL", "WEIGHTBL", "HEIGHTBL"),
  categorical_vars = c("AGEGR1", "SEX", "RACE", "ETHNIC"),
  var_order = c("AGE", "AGEGR1", "BMIBL", "WEIGHTBL", "HEIGHTBL", "SEX", "RACE", "ETHNIC"),
  continuous_digits =c(0, 1, 2, 1, 1, 1, 2, 2),
  continuous_labels = list(
    AGE = "Age",
    BMIBL = "BMI",
    WEIGHTBL = "WEIGHT AT BL",
    HEIGHTBL = "HEIGHT AT BL"
  ),
  categorical_labels = list(
    AGEGR1 = "AGE GROUP"
  ),
  include_overall = TRUE,
  add_p=TRUE,
  add_CI=TRUE,
   add_difference=TRUE, #this difference only work for two levels 
  table_title="Table 14-1.01: Summary of Demographics",
  population="Full dataset",
    output_type = c("tbl_summary")
  # output_type = c("flextable")
  #output_type="tibble"
)
Table 14-1.01: Summary of Demographics
Full dataset
Characteristic Total
(N=170)1
95% CI2 Placebo
N = 861
95% CI2 Xanomeline High Dose
N = 841
95% CI2 Difference3 95% CI3,2 P Value4
Age
74, 76
73, 77
73, 76 0.83 -1.7, 3.3 0.435
    n 170
86
84



    Mean (SD) 74.8 (8.24)
75.2 (8.59)
74.4 (7.89)



    Median (P25, P75) 76.0 (70.0, 81.0)
76.0 (69.0, 82.0)
76.0 (70.5, 80.0)



    Min, Max 52.00, 89.00
52.00, 89.00
56.00, 88.00



AGE GROUP, n (%)





0.35 0.05, 0.65 0.079
    <65 25 (15%) 9.9%, 21% 14 (16%) 9.5%, 26% 11 (13%) 7.0%, 23%


    >80 48 (28%) 22%, 36% 30 (35%) 25%, 46% 18 (21%) 14%, 32%


    65-80 97 (57%) 49%, 65% 42 (49%) 38%, 60% 55 (65%) 54%, 75%


BMI
24, 25
23, 24
24, 26 -1.7 -2.9, -0.52 0.003
    n 170
86
84



    Mean (SD) 24.5 (4.00)
23.6 (3.67)
25.3 (4.16)



    Median (P25, P75) 24.2 (21.8, 27.1)
23.4 (21.2, 25.6)
24.8 (22.7, 27.9)



    Min, Max 13.70, 34.50
15.10, 33.30
13.70, 34.50



WEIGHT AT BL
64, 68
60, 65
67, 73 -7.2 -11, -3.1 0.003
    n 170
86
84



    Mean (SD) 66.3 (14.17)
62.8 (12.77)
70.0 (14.65)



    Median (P25, P75) 66.7 (54.9, 76.7)
60.6 (53.5, 74.4)
69.2 (56.8, 80.3)



    Min, Max 34.00, 108.00
34.00, 86.20
41.70, 108.00



HEIGHT AT BL
163, 166
160, 165
164, 168 -3.2 -6.5, 0.04 0.071
    n 170
86
84



    Mean (SD) 164.2 (10.95)
162.6 (11.52)
165.8 (10.13)



    Median (P25, P75) 165.1 (154.9, 172.7)
162.6 (153.7, 171.5)
165.1 (157.5, 172.9)



    Min, Max 137.20, 190.50
137.20, 185.40
146.10, 190.50



Sex, n (%)





0.28 -0.02, 0.59 0.067
    F 93 (55%) 47%, 62% 53 (62%) 50%, 72% 40 (48%) 37%, 59%


    M 77 (45%) 38%, 53% 33 (38%) 28%, 50% 44 (52%) 41%, 63%


Race, n (%)





0.16 -0.14, 0.46 0.705
    AMERICAN INDIAN OR ALASKA NATIVE 1 (0.6%) 0.03%, 3.7% 0 (0%) 0.00%, 5.3% 1 (1.2%) 0.06%, 7.4%


    BLACK OR AFRICAN AMERICAN 17 (10%) 6.1%, 16% 8 (9.3%) 4.4%, 18% 9 (11%) 5.3%, 20%


    WHITE 152 (89%) 84%, 93% 78 (91%) 82%, 96% 74 (88%) 79%, 94%


Ethnicity, n (%)





0.00 -0.30, 0.31 >0.999
    HISPANIC OR LATINO 6 (3.5%) 1.4%, 7.9% 3 (3.5%) 0.91%, 11% 3 (3.6%) 0.93%, 11%


    NOT HISPANIC OR LATINO 164 (96%) 92%, 99% 83 (97%) 89%, 99% 81 (96%) 89%, 99%


1 source: produced through continuous and categorical table with Pvalue.r
2 CI = Confidence Interval
3 Welch Two Sample t-test; Standardized Mean Difference
4 Wilcoxon rank sum test; Pearson’s Chi-squared test; Fisher’s exact test

Table 14-3.02

Code
# Source the R script that contains the create_summary_table function
# source("C:/Users/zunqi/OneDrive/R related/R and AI/functions/continuous and categorical table.r")



# IMPORT DATA
adqscibc_orig <- haven::read_xpt(
   'https://raw.githubusercontent.com/cdisc-org/sdtm-adam-pilot-project/master/updated-pilot-submission-package/900172/m5/datasets/cdiscpilot01/analysis/adam/datasets/adqscibc.xpt')

# SUBSET
adqscibc <- adqscibc_orig %>%
   filter(EFFFL == 'Y' & ITTFL=='Y' & PARAMCD == 'CIBICVAL' & ANL01FL == 'Y') %>% 
   filter(AVISITN == 24) %>% 
   filter(!is.na(AVAL)) %>% 
   mutate(TRTP = factor(TRTP, 
                        levels = c('Placebo','Xanomeline Low Dose','Xanomeline High Dose'),
                        labels = c('Placebo', 'Low Dose','High Dose'))) 

adqscibc$CIBIC_cat<-ifelse(adqscibc$AVAL<4,"Low", "High")

# Ensure CIBIC_cat is a factor with two levels
adqscibc$CIBIC_cat <- as.factor(adqscibc$CIBIC_cat)

set M as reference level for gender

Code
#set reference level for gender
adqscibc$SEX<-as.factor(adqscibc$SEX)
adqscibc$SEX<-relevel(adqscibc$SEX, ref="M")
Code
mulm_table<-regression_table(response=CIBIC_cat,covariates=c("TRTP", "AGE", "SEX") , data=adqscibc,
                  regression_type=logistic,
                  type=multivariate,
                  add_n=TRUE,
                  add_nevent=TRUE,
                  table_title="Table 14-3.02: Primary endpoint: multivariate logistic regression",
                  population="Full dataset",
                  output_type = c("tbl_summary")
                  # output_type = c("flextable")
                 )

mulm_table
Table 14-3.02: Primary endpoint: multivariate logistic regression
Full dataset
Characteristic N Event N OR1,2 95% CI2 p-value
TRTP 234



    Placebo 79 10
    Low Dose 81 15 1.63 0.69, 4.03 0.3
    High Dose 74 11 1.12 0.44, 2.89 0.8
Age 234 36 0.97 0.93, 1.02 0.2
SEX 234



    M 106 20
    F 128 16 0.63 0.30, 1.29 0.2
1 Source: produced through simple regression with table.r
2 OR = Odds Ratio, CI = Confidence Interval
Code
# several univariate models
uvlm_table<-regression_table(response=CIBIC_cat,covariates=c("TRTP", "AGE", "SEX") , data=adqscibc,
                  regression_type=logistic,
                  type=univariate,
                  add_n=TRUE,
                  add_nevent=TRUE,
                  table_title="Table 14-3.02: Primary endpoint: univariate logistic regression",
                  population="Full dataset",
                  output_type = c("tbl_summary")
                  # output_type = c("flextable")
                 )

uvlm_table
Table 14-3.02: Primary endpoint: univariate logistic regression
Full dataset
Characteristic N Event N OR1,2 95% CI2 p-value
TRTP 234



    Placebo 79 10
    Low Dose 81 15 1.57 0.66, 3.84 0.3
    High Dose 74 11 1.20 0.48, 3.08 0.7
Age 234 36 0.97 0.93, 1.02 0.2
SEX 234



    M 106 20
    F 128 16 0.61 0.30, 1.25 0.2
1 Source: produced through simple regression with table.r
2 OR = Odds Ratio, CI = Confidence Interval
Code
merge_table<-tbl_merge(tbls=list(mulm_table, uvlm_table),tab_spanner=c("multivariate result", "univariate result"))%>%
  modify_caption("**Table 14-3.02:Multivariate and Univariate Table logistic regression**")
merge_table
Table 14-3.02:Multivariate and Univariate Table logistic regression
Characteristic
multivariate result
univariate result
N Event N OR1,2 95% CI2 p-value N Event N OR1,2 95% CI2 p-value
TRTP 234



234



    Placebo 79 10
79 10
    Low Dose 81 15 1.63 0.69, 4.03 0.3 81 15 1.57 0.66, 3.84 0.3
    High Dose 74 11 1.12 0.44, 2.89 0.8 74 11 1.20 0.48, 3.08 0.7
Age 234 36 0.97 0.93, 1.02 0.2 234 36 0.97 0.93, 1.02 0.2
SEX 234



234



    M 106 20
106 20
    F 128 16 0.63 0.30, 1.29 0.2 128 16 0.61 0.30, 1.25 0.2
1 Source: produced through simple regression with table.r
2 OR = Odds Ratio, CI = Confidence Interval

Table 14-3.03

Code
mulm_table<-regression_table(response=AVAL,covariates=c("TRTP", "AGE", "SEX") , data=adqscibc,
                  regression_type=linear,
                  type=multivariate,
                  add_n=TRUE,
                                    table_title="Table 14-3.03: Primary endpoint: multivariate linear regression",
                  population="Full dataset",
                  output_type = c("tbl_summary")
                  # output_type = c("flextable")
                 )

mulm_table
Table 14-3.03: Primary endpoint: multivariate linear regression
Full dataset
Characteristic N Beta1 95% CI2 p-value
TRTP 234


    Placebo 79
    Low Dose 81 -0.12 -0.36, 0.13 0.3
    High Dose 74 0.05 -0.20, 0.30 0.7
Age 234 0.01 0.00, 0.02 0.083
SEX 234


    M 106
    F 128 0.06 -0.14, 0.27 0.5
1 Source: produced through simple regression with table.r
2 CI = Confidence Interval
Code
# several univariate models
uvlm_table<-regression_table(response=AVAL,covariates=c("TRTP", "AGE", "SEX") , data=adqscibc,
                  regression_type=linear,
                  type=univariate,
                  add_n=TRUE,
                  table_title="Table 14-3.03: Primary endpoint: univariate linear regression",
                  population="Full dataset",
                  output_type = c("tbl_summary")
                  # output_type = c("flextable")
                 )

uvlm_table
Table 14-3.03: Primary endpoint: univariate linear regression
Full dataset
Characteristic N Beta1 95% CI2 p-value
TRTP 234


    Placebo 79
    Low Dose 81 -0.11 -0.35, 0.14 0.4
    High Dose 74 0.03 -0.22, 0.28 0.8
Age 234 0.01 0.00, 0.02 0.10
SEX 234


    M 106
    F 128 0.07 -0.13, 0.27 0.5
1 Source: produced through simple regression with table.r
2 CI = Confidence Interval
Code
merge_table<-tbl_merge(tbls=list(mulm_table, uvlm_table),tab_spanner=c("multivariate result", "univariate result"))%>%
  modify_caption("**Table 14-3.03:Multivariate and Univariate Table linear regression**")
merge_table
Table 14-3.03:Multivariate and Univariate Table linear regression
Characteristic
multivariate result
univariate result
N Beta1 95% CI2 p-value N Beta1 95% CI2 p-value
TRTP 234


234


    Placebo 79
79
    Low Dose 81 -0.12 -0.36, 0.13 0.3 81 -0.11 -0.35, 0.14 0.4
    High Dose 74 0.05 -0.20, 0.30 0.7 74 0.03 -0.22, 0.28 0.8
Age 234 0.01 0.00, 0.02 0.083 234 0.01 0.00, 0.02 0.10
SEX 234


234


    M 106
106
    F 128 0.06 -0.14, 0.27 0.5 128 0.07 -0.13, 0.27 0.5
1 Source: produced through simple regression with table.r
2 CI = Confidence Interval

Table 14-1.41

Code
d<-trial%>%dplyr::select(trt,age,grade,response)
create_summary_table(
  data = d,
  by_var = trt,
  continuous_vars = c("age"),
  categorical_vars = c("grade", "response"),
  var_order = c("age", "grade", "response"),
  continuous_digits =c(0, 1, 2, 1, 1, 1, 2, 2),
  continuous_labels = list(
    AGE = "Age"
  ),
  categorical_labels = list(
      grade = "Grade",
    response = "Tumor Response"
  ),
  include_overall = TRUE,
  add_p=TRUE,
  add_CI=TRUE,
  add_difference = TRUE,
  table_title="Table 14-1.41: Summary of cancer information",
  population="Full dataset",
   output_type = c("tbl_summary")
  # output_type = c("flextable")
  #output_type="tibble"
)
Table 14-1.41: Summary of cancer information
Full dataset
Characteristic Total
(N=200)1
95% CI2 Drug A
N = 981
95% CI2 Drug B
N = 1021
95% CI2 Difference3 95% CI3,2 P Value4
Age
45, 49
44, 50
45, 50 -0.44 -4.6, 3.7 0.718
    n 189
91
98



    Mean (SD) 47.2 (14.31)
47.0 (14.71)
47.4 (14.01)



    Median (P25, P75) 47.0 (38.0, 57.0)
46.0 (37.0, 60.0)
48.0 (39.0, 56.0)



    Min, Max 6.00, 83.00
6.00, 78.00
9.00, 83.00



    Missing 11
7
4



Grade, n (%)





0.07 -0.20, 0.35 0.871
    I 68 (34%) 28%, 41% 35 (36%) 26%, 46% 33 (32%) 24%, 42%


    II 68 (34%) 28%, 41% 32 (33%) 24%, 43% 36 (35%) 26%, 45%


    III 64 (32%) 26%, 39% 31 (32%) 23%, 42% 33 (32%) 24%, 42%


Tumor Response, n (%)





-0.09 -0.37, 0.19 0.530
    0 132 (68%) 61%, 75% 67 (71%) 60%, 79% 65 (66%) 56%, 75%


    1 61 (32%) 25%, 39% 28 (29%) 21%, 40% 33 (34%) 25%, 44%


    Missing 7
3
4



1 source: produced through continuous and categorical table with Pvalue.r
2 CI = Confidence Interval
3 Welch Two Sample t-test; Standardized Mean Difference
4 Wilcoxon rank sum test; Pearson’s Chi-squared test

Table 14-1.42

Code
 source("C:/Users/zunqi/OneDrive/R related/R and AI/functions/simple regression with table.r")
#notice the response format entry is different from other regression when you are doing survial analysis
# ttdeath is time to death which is needed 

mulm_table<-regression_table(response=c("ttdeath", "death"),covariates=c("trt", "age", "grade") , data=trial,
                  regression_type=survival,
                  type=multivariate,
                  add_n=TRUE,
                                    table_title="Table 14-1.42: Survival Analysis, multivariate cox HR regression",
                  population="Full dataset",
                  output_type = c("tbl_summary")
                  # output_type = c("flextable")
                 )

mulm_table
Table 14-1.42: Survival Analysis, multivariate cox HR regression
Full dataset
Characteristic N HR1,2 95% CI2 p-value
Chemotherapy Treatment 189


    Drug A 91
    Drug B 98 1.30 0.88, 1.92 0.2
Age 189 1.01 0.99, 1.02 0.3
Grade 189


    I 66
    II 62 1.21 0.73, 1.99 0.5
    III 61 1.79 1.12, 2.86 0.014
1 Source: produced through simple regression with table.r
2 HR = Hazard Ratio, CI = Confidence Interval
Code
#for univariate survival analysis type does not matter
regression_table(response=c("ttdeath", "death"),covariates=c("trt") , data=trial,
                  regression_type=survival,
                  type=multivariate,
                  add_n=TRUE,
                                    table_title="Table 14-1.42.1: Survival Analysis, univariate cox HR regression",
                  population="Full dataset",
                  output_type = c("tbl_summary")
                  # output_type = c("flextable")
                 )
Table 14-1.42.1: Survival Analysis, univariate cox HR regression
Full dataset
Characteristic N HR1,2 95% CI2 p-value
Chemotherapy Treatment 200


    Drug A 98
    Drug B 102 1.25 0.86, 1.81 0.2
1 Source: produced through simple regression with table.r
2 HR = Hazard Ratio, CI = Confidence Interval