Code

rm(list=ls(all=TRUE))
library(tidyverse)
library(officer)
library(lubridate)

library(rio)
library(gtsummary)
library(flextable)
library(dplyr)
library(haven)
library(gt)
library(survival)

CDISCPILOT01

Brief Description of Case Study

This is a file contains different results from different function calls which are built for general analysis purposes. Cannot render to PDF because some coding is not compatiable with Latex environment.

Tables

Tables 14-1

Table 14-1.01

set #| echo: false if you don’t want code to be displayed in WORD output or HTML output.

Code

# Source the R script that contains the create_summary_table function
# source("C:/Users/zunqi/OneDrive/R related/R and AI/functions/continuous and categorical table.r")
 source("C:/Users/zunqi/OneDrive/R related/R and AI/functions/continuous and categorical table with Pvalue.r")
 source("C:/Users/zunqi/OneDrive/R related/R and AI/functions/simple regression with table.r")

# IMPORT DATA
adsl_orig <- haven::read_xpt(
   'https://raw.githubusercontent.com/cdisc-org/sdtm-adam-pilot-project/master/updated-pilot-submission-package/900172/m5/datasets/cdiscpilot01/analysis/adam/datasets/adsl.xpt')

adsl_orig.sub<-adsl_orig%>%filter(ARM %in% c("Placebo", "Xanomeline High Dose"))
# Call the create_summary_table function

create_summary_table(
  data = adsl_orig,
  by_var = TRT01A,
  continuous_vars = c("AGE", "BMIBL", "WEIGHTBL", "HEIGHTBL"),
  categorical_vars = c("AGEGR1", "SEX", "RACE", "ETHNIC"),
  var_order = c("AGE", "AGEGR1", "BMIBL", "WEIGHTBL", "HEIGHTBL", "SEX", "RACE", "ETHNIC"),
  continuous_digits =c(0, 1, 2, 1, 1, 1, 2, 2),
  continuous_labels = list(
    AGE = "Age",
    BMIBL = "BMI",
    WEIGHTBL = "WEIGHT AT BL",
    HEIGHTBL = "HEIGHT AT BL"
  ),
  categorical_labels = list(
    AGEGR1 = "AGE GROUP"
  ),
  include_overall = TRUE,
  add_p=TRUE,
  add_CI=TRUE,
  table_title="Table 14-1.01: Summary of Demographics",
  population="Full dataset",
    output_type = c("tbl_summary")
  # output_type = c("flextable")
  #output_type="tibble"
)

**Table 14-1.01: Summary of Demographics**
**Full dataset**
Characteristic	Total (N=254)¹	95% CI²	Placebo N = 86¹	95% CI²	Xanomeline High Dose N = 84¹	95% CI²	Xanomeline Low Dose N = 84¹	95% CI²	P Value³
Age		74, 76		73, 77		73, 76		74, 77	0.442
n	254		86		84		84
Mean (SD)	75.1 (8.25)		75.2 (8.59)		74.4 (7.89)		75.7 (8.29)
Median (P25, P75)	77.0 (70.0, 81.0)		76.0 (69.0, 82.0)		76.0 (70.5, 80.0)		77.5 (71.0, 82.0)
Min, Max	51.00, 89.00		52.00, 89.00		56.00, 88.00		51.00, 88.00
AGE GROUP, n (%)									0.144
<65	33 (13%)	9.2%, 18%	14 (16%)	9.5%, 26%	11 (13%)	7.0%, 23%	8 (9.5%)	4.5%, 18%
>80	77 (30%)	25%, 36%	30 (35%)	25%, 46%	18 (21%)	14%, 32%	29 (35%)	25%, 46%
65-80	144 (57%)	50%, 63%	42 (49%)	38%, 60%	55 (65%)	54%, 75%	47 (56%)	45%, 67%
BMI		24, 25		23, 24		24, 26		24, 26	0.010
n	253		86		84		83
Mean (SD)	24.7 (4.09)		23.6 (3.67)		25.3 (4.16)		25.1 (4.27)
Median (P25, P75)	24.2 (21.9, 27.3)		23.4 (21.2, 25.6)		24.8 (22.7, 27.9)		24.3 (22.1, 27.8)
Min, Max	13.70, 40.10		15.10, 33.30		13.70, 34.50		17.70, 40.10
Missing	1		0		0		1
WEIGHT AT BL		65, 68		60, 65		67, 73		64, 70	0.011
n	253		86		84		83
Mean (SD)	66.6 (14.13)		62.8 (12.77)		70.0 (14.65)		67.3 (14.12)
Median (P25, P75)	66.7 (55.3, 77.1)		60.6 (53.5, 74.4)		69.2 (56.8, 80.3)		64.9 (55.8, 77.8)
Min, Max	34.00, 108.00		34.00, 86.20		41.70, 108.00		45.40, 106.10
Missing	1		0		0		1
HEIGHT AT BL		163, 165		160, 165		164, 168		161, 166	0.134
n	254		86		84		84
Mean (SD)	163.9 (10.76)		162.6 (11.52)		165.8 (10.13)		163.4 (10.42)
Median (P25, P75)	162.9 (156.2, 171.5)		162.6 (153.7, 171.5)		165.1 (157.5, 172.9)		162.6 (157.5, 170.2)
Min, Max	135.90, 195.60		137.20, 185.40		146.10, 190.50		135.90, 195.60
Sex, n (%)									0.141
F	143 (56%)	50%, 62%	53 (62%)	50%, 72%	40 (48%)	37%, 59%	50 (60%)	48%, 70%
M	111 (44%)	38%, 50%	33 (38%)	28%, 50%	44 (52%)	41%, 63%	34 (40%)	30%, 52%
Race, n (%)									0.680
AMERICAN INDIAN OR ALASKA NATIVE	1 (0.4%)	0.02%, 2.5%	0 (0%)	0.00%, 5.3%	1 (1.2%)	0.06%, 7.4%	0 (0%)	0.00%, 5.4%
BLACK OR AFRICAN AMERICAN	23 (9.1%)	5.9%, 13%	8 (9.3%)	4.4%, 18%	9 (11%)	5.3%, 20%	6 (7.1%)	2.9%, 15%
WHITE	230 (91%)	86%, 94%	78 (91%)	82%, 96%	74 (88%)	79%, 94%	78 (93%)	85%, 97%
Ethnicity, n (%)									0.570
HISPANIC OR LATINO	12 (4.7%)	2.6%, 8.3%	3 (3.5%)	0.91%, 11%	3 (3.6%)	0.93%, 11%	6 (7.1%)	2.9%, 15%
NOT HISPANIC OR LATINO	242 (95%)	92%, 97%	83 (97%)	89%, 99%	81 (96%)	89%, 99%	78 (93%)	85%, 97%
¹ source: produced through continuous and categorical table with Pvalue.r
² CI = Confidence Interval
³ Kruskal-Wallis rank sum test; Pearson’s Chi-squared test; Fisher’s exact test

Code

create_summary_table(
  data = adsl_orig.sub,
  by_var = TRT01A,
  continuous_vars = c("AGE", "BMIBL", "WEIGHTBL", "HEIGHTBL"),
  categorical_vars = c("AGEGR1", "SEX", "RACE", "ETHNIC"),
  var_order = c("AGE", "AGEGR1", "BMIBL", "WEIGHTBL", "HEIGHTBL", "SEX", "RACE", "ETHNIC"),
  continuous_digits =c(0, 1, 2, 1, 1, 1, 2, 2),
  continuous_labels = list(
    AGE = "Age",
    BMIBL = "BMI",
    WEIGHTBL = "WEIGHT AT BL",
    HEIGHTBL = "HEIGHT AT BL"
  ),
  categorical_labels = list(
    AGEGR1 = "AGE GROUP"
  ),
  include_overall = TRUE,
  add_p=TRUE,
  add_CI=TRUE,
   add_difference=TRUE, #this difference only work for two levels 
  table_title="Table 14-1.01: Summary of Demographics",
  population="Full dataset",
    output_type = c("tbl_summary")
  # output_type = c("flextable")
  #output_type="tibble"
)

**Table 14-1.01: Summary of Demographics**
**Full dataset**
Characteristic	Total (N=170)¹	95% CI²	Placebo N = 86¹	95% CI²	Xanomeline High Dose N = 84¹	95% CI²	Difference³	95% CI^3,2	P Value⁴
Age		74, 76		73, 77		73, 76	0.83	-1.7, 3.3	0.435
n	170		86		84
Mean (SD)	74.8 (8.24)		75.2 (8.59)		74.4 (7.89)
Median (P25, P75)	76.0 (70.0, 81.0)		76.0 (69.0, 82.0)		76.0 (70.5, 80.0)
Min, Max	52.00, 89.00		52.00, 89.00		56.00, 88.00
AGE GROUP, n (%)							0.35	0.05, 0.65	0.079
<65	25 (15%)	9.9%, 21%	14 (16%)	9.5%, 26%	11 (13%)	7.0%, 23%
>80	48 (28%)	22%, 36%	30 (35%)	25%, 46%	18 (21%)	14%, 32%
65-80	97 (57%)	49%, 65%	42 (49%)	38%, 60%	55 (65%)	54%, 75%
BMI		24, 25		23, 24		24, 26	-1.7	-2.9, -0.52	0.003
n	170		86		84
Mean (SD)	24.5 (4.00)		23.6 (3.67)		25.3 (4.16)
Median (P25, P75)	24.2 (21.8, 27.1)		23.4 (21.2, 25.6)		24.8 (22.7, 27.9)
Min, Max	13.70, 34.50		15.10, 33.30		13.70, 34.50
WEIGHT AT BL		64, 68		60, 65		67, 73	-7.2	-11, -3.1	0.003
n	170		86		84
Mean (SD)	66.3 (14.17)		62.8 (12.77)		70.0 (14.65)
Median (P25, P75)	66.7 (54.9, 76.7)		60.6 (53.5, 74.4)		69.2 (56.8, 80.3)
Min, Max	34.00, 108.00		34.00, 86.20		41.70, 108.00
HEIGHT AT BL		163, 166		160, 165		164, 168	-3.2	-6.5, 0.04	0.071
n	170		86		84
Mean (SD)	164.2 (10.95)		162.6 (11.52)		165.8 (10.13)
Median (P25, P75)	165.1 (154.9, 172.7)		162.6 (153.7, 171.5)		165.1 (157.5, 172.9)
Min, Max	137.20, 190.50		137.20, 185.40		146.10, 190.50
Sex, n (%)							0.28	-0.02, 0.59	0.067
F	93 (55%)	47%, 62%	53 (62%)	50%, 72%	40 (48%)	37%, 59%
M	77 (45%)	38%, 53%	33 (38%)	28%, 50%	44 (52%)	41%, 63%
Race, n (%)							0.16	-0.14, 0.46	0.705
AMERICAN INDIAN OR ALASKA NATIVE	1 (0.6%)	0.03%, 3.7%	0 (0%)	0.00%, 5.3%	1 (1.2%)	0.06%, 7.4%
BLACK OR AFRICAN AMERICAN	17 (10%)	6.1%, 16%	8 (9.3%)	4.4%, 18%	9 (11%)	5.3%, 20%
WHITE	152 (89%)	84%, 93%	78 (91%)	82%, 96%	74 (88%)	79%, 94%
Ethnicity, n (%)							0.00	-0.30, 0.31	>0.999
HISPANIC OR LATINO	6 (3.5%)	1.4%, 7.9%	3 (3.5%)	0.91%, 11%	3 (3.6%)	0.93%, 11%
NOT HISPANIC OR LATINO	164 (96%)	92%, 99%	83 (97%)	89%, 99%	81 (96%)	89%, 99%
¹ source: produced through continuous and categorical table with Pvalue.r
² CI = Confidence Interval
³ Welch Two Sample t-test; Standardized Mean Difference
⁴ Wilcoxon rank sum test; Pearson’s Chi-squared test; Fisher’s exact test

Table 14-3.02

Code

# Source the R script that contains the create_summary_table function
# source("C:/Users/zunqi/OneDrive/R related/R and AI/functions/continuous and categorical table.r")



# IMPORT DATA
adqscibc_orig <- haven::read_xpt(
   'https://raw.githubusercontent.com/cdisc-org/sdtm-adam-pilot-project/master/updated-pilot-submission-package/900172/m5/datasets/cdiscpilot01/analysis/adam/datasets/adqscibc.xpt')

# SUBSET
adqscibc <- adqscibc_orig %>%
   filter(EFFFL == 'Y' & ITTFL=='Y' & PARAMCD == 'CIBICVAL' & ANL01FL == 'Y') %>% 
   filter(AVISITN == 24) %>% 
   filter(!is.na(AVAL)) %>% 
   mutate(TRTP = factor(TRTP, 
                        levels = c('Placebo','Xanomeline Low Dose','Xanomeline High Dose'),
                        labels = c('Placebo', 'Low Dose','High Dose'))) 

adqscibc$CIBIC_cat<-ifelse(adqscibc$AVAL<4,"Low", "High")

# Ensure CIBIC_cat is a factor with two levels
adqscibc$CIBIC_cat <- as.factor(adqscibc$CIBIC_cat)

set M as reference level for gender

Code

#set reference level for gender
adqscibc$SEX<-as.factor(adqscibc$SEX)
adqscibc$SEX<-relevel(adqscibc$SEX, ref="M")

Code

mulm_table<-regression_table(response=CIBIC_cat,covariates=c("TRTP", "AGE", "SEX") , data=adqscibc,
                  regression_type=logistic,
                  type=multivariate,
                  add_n=TRUE,
                  add_nevent=TRUE,
                  table_title="Table 14-3.02: Primary endpoint: multivariate logistic regression",
                  population="Full dataset",
                  output_type = c("tbl_summary")
                  # output_type = c("flextable")
                 )

mulm_table

**Table 14-3.02: Primary endpoint: multivariate logistic regression**
**Full dataset**
Characteristic	N	Event N	OR^1,2	95% CI²	p-value
TRTP	234
Placebo	79	10	—	—
Low Dose	81	15	1.63	0.69, 4.03	0.3
High Dose	74	11	1.12	0.44, 2.89	0.8
Age	234	36	0.97	0.93, 1.02	0.2
SEX	234
M	106	20	—	—
F	128	16	0.63	0.30, 1.29	0.2
¹ Source: produced through simple regression with table.r
² OR = Odds Ratio, CI = Confidence Interval

Code

# several univariate models
uvlm_table<-regression_table(response=CIBIC_cat,covariates=c("TRTP", "AGE", "SEX") , data=adqscibc,
                  regression_type=logistic,
                  type=univariate,
                  add_n=TRUE,
                  add_nevent=TRUE,
                  table_title="Table 14-3.02: Primary endpoint: univariate logistic regression",
                  population="Full dataset",
                  output_type = c("tbl_summary")
                  # output_type = c("flextable")
                 )

uvlm_table

**Table 14-3.02: Primary endpoint: univariate logistic regression**
**Full dataset**
Characteristic	N	Event N	OR^1,2	95% CI²	p-value
TRTP	234
Placebo	79	10	—	—
Low Dose	81	15	1.57	0.66, 3.84	0.3
High Dose	74	11	1.20	0.48, 3.08	0.7
Age	234	36	0.97	0.93, 1.02	0.2
SEX	234
M	106	20	—	—
F	128	16	0.61	0.30, 1.25	0.2
¹ Source: produced through simple regression with table.r
² OR = Odds Ratio, CI = Confidence Interval

Code

merge_table<-tbl_merge(tbls=list(mulm_table, uvlm_table),tab_spanner=c("multivariate result", "univariate result"))%>%
  modify_caption("**Table 14-3.02:Multivariate and Univariate Table logistic regression**")
merge_table

**Table 14-3.02:Multivariate and Univariate Table logistic regression**
Characteristic	multivariate result					univariate result
Characteristic	N	Event N	OR^1,2	95% CI²	p-value	N	Event N	OR^1,2	95% CI²	p-value
TRTP	234					234
Placebo	79	10	—	—		79	10	—	—
Low Dose	81	15	1.63	0.69, 4.03	0.3	81	15	1.57	0.66, 3.84	0.3
High Dose	74	11	1.12	0.44, 2.89	0.8	74	11	1.20	0.48, 3.08	0.7
Age	234	36	0.97	0.93, 1.02	0.2	234	36	0.97	0.93, 1.02	0.2
SEX	234					234
M	106	20	—	—		106	20	—	—
F	128	16	0.63	0.30, 1.29	0.2	128	16	0.61	0.30, 1.25	0.2
¹ Source: produced through simple regression with table.r
² OR = Odds Ratio, CI = Confidence Interval

Table 14-3.03

Code

mulm_table<-regression_table(response=AVAL,covariates=c("TRTP", "AGE", "SEX") , data=adqscibc,
                  regression_type=linear,
                  type=multivariate,
                  add_n=TRUE,
                                    table_title="Table 14-3.03: Primary endpoint: multivariate linear regression",
                  population="Full dataset",
                  output_type = c("tbl_summary")
                  # output_type = c("flextable")
                 )

mulm_table

**Table 14-3.03: Primary endpoint: multivariate linear regression**
**Full dataset**
Characteristic	N	Beta¹	95% CI²	p-value
TRTP	234
Placebo	79	—	—
Low Dose	81	-0.12	-0.36, 0.13	0.3
High Dose	74	0.05	-0.20, 0.30	0.7
Age	234	0.01	0.00, 0.02	0.083
SEX	234
M	106	—	—
F	128	0.06	-0.14, 0.27	0.5
¹ Source: produced through simple regression with table.r
² CI = Confidence Interval

Code

# several univariate models
uvlm_table<-regression_table(response=AVAL,covariates=c("TRTP", "AGE", "SEX") , data=adqscibc,
                  regression_type=linear,
                  type=univariate,
                  add_n=TRUE,
                  table_title="Table 14-3.03: Primary endpoint: univariate linear regression",
                  population="Full dataset",
                  output_type = c("tbl_summary")
                  # output_type = c("flextable")
                 )

uvlm_table

**Table 14-3.03: Primary endpoint: univariate linear regression**
**Full dataset**
Characteristic	N	Beta¹	95% CI²	p-value
TRTP	234
Placebo	79	—	—
Low Dose	81	-0.11	-0.35, 0.14	0.4
High Dose	74	0.03	-0.22, 0.28	0.8
Age	234	0.01	0.00, 0.02	0.10
SEX	234
M	106	—	—
F	128	0.07	-0.13, 0.27	0.5
¹ Source: produced through simple regression with table.r
² CI = Confidence Interval

Code

merge_table<-tbl_merge(tbls=list(mulm_table, uvlm_table),tab_spanner=c("multivariate result", "univariate result"))%>%
  modify_caption("**Table 14-3.03:Multivariate and Univariate Table linear regression**")
merge_table

**Table 14-3.03:Multivariate and Univariate Table linear regression**
Characteristic	multivariate result				univariate result
Characteristic	N	Beta¹	95% CI²	p-value	N	Beta¹	95% CI²	p-value
TRTP	234				234
Placebo	79	—	—		79	—	—
Low Dose	81	-0.12	-0.36, 0.13	0.3	81	-0.11	-0.35, 0.14	0.4
High Dose	74	0.05	-0.20, 0.30	0.7	74	0.03	-0.22, 0.28	0.8
Age	234	0.01	0.00, 0.02	0.083	234	0.01	0.00, 0.02	0.10
SEX	234				234
M	106	—	—		106	—	—
F	128	0.06	-0.14, 0.27	0.5	128	0.07	-0.13, 0.27	0.5
¹ Source: produced through simple regression with table.r
² CI = Confidence Interval

Table 14-1.41

Code

d<-trial%>%dplyr::select(trt,age,grade,response)
create_summary_table(
  data = d,
  by_var = trt,
  continuous_vars = c("age"),
  categorical_vars = c("grade", "response"),
  var_order = c("age", "grade", "response"),
  continuous_digits =c(0, 1, 2, 1, 1, 1, 2, 2),
  continuous_labels = list(
    AGE = "Age"
  ),
  categorical_labels = list(
      grade = "Grade",
    response = "Tumor Response"
  ),
  include_overall = TRUE,
  add_p=TRUE,
  add_CI=TRUE,
  add_difference = TRUE,
  table_title="Table 14-1.41: Summary of cancer information",
  population="Full dataset",
   output_type = c("tbl_summary")
  # output_type = c("flextable")
  #output_type="tibble"
)

**Table 14-1.41: Summary of cancer information**
**Full dataset**
Characteristic	Total (N=200)¹	95% CI²	Drug A N = 98¹	95% CI²	Drug B N = 102¹	95% CI²	Difference³	95% CI^3,2	P Value⁴
Age		45, 49		44, 50		45, 50	-0.44	-4.6, 3.7	0.718
n	189		91		98
Mean (SD)	47.2 (14.31)		47.0 (14.71)		47.4 (14.01)
Median (P25, P75)	47.0 (38.0, 57.0)		46.0 (37.0, 60.0)		48.0 (39.0, 56.0)
Min, Max	6.00, 83.00		6.00, 78.00		9.00, 83.00
Missing	11		7		4
Grade, n (%)							0.07	-0.20, 0.35	0.871
I	68 (34%)	28%, 41%	35 (36%)	26%, 46%	33 (32%)	24%, 42%
II	68 (34%)	28%, 41%	32 (33%)	24%, 43%	36 (35%)	26%, 45%
III	64 (32%)	26%, 39%	31 (32%)	23%, 42%	33 (32%)	24%, 42%
Tumor Response, n (%)							-0.09	-0.37, 0.19	0.530
0	132 (68%)	61%, 75%	67 (71%)	60%, 79%	65 (66%)	56%, 75%
1	61 (32%)	25%, 39%	28 (29%)	21%, 40%	33 (34%)	25%, 44%
Missing	7		3		4
¹ source: produced through continuous and categorical table with Pvalue.r
² CI = Confidence Interval
³ Welch Two Sample t-test; Standardized Mean Difference
⁴ Wilcoxon rank sum test; Pearson’s Chi-squared test

Table 14-1.42

Code

 source("C:/Users/zunqi/OneDrive/R related/R and AI/functions/simple regression with table.r")
#notice the response format entry is different from other regression when you are doing survial analysis
# ttdeath is time to death which is needed 

mulm_table<-regression_table(response=c("ttdeath", "death"),covariates=c("trt", "age", "grade") , data=trial,
                  regression_type=survival,
                  type=multivariate,
                  add_n=TRUE,
                                    table_title="Table 14-1.42: Survival Analysis, multivariate cox HR regression",
                  population="Full dataset",
                  output_type = c("tbl_summary")
                  # output_type = c("flextable")
                 )

mulm_table

**Table 14-1.42: Survival Analysis, multivariate cox HR regression**
**Full dataset**
Characteristic	N	HR^1,2	95% CI²	p-value
Chemotherapy Treatment	189
Drug A	91	—	—
Drug B	98	1.30	0.88, 1.92	0.2
Age	189	1.01	0.99, 1.02	0.3
Grade	189
I	66	—	—
II	62	1.21	0.73, 1.99	0.5
III	61	1.79	1.12, 2.86	0.014
¹ Source: produced through simple regression with table.r
² HR = Hazard Ratio, CI = Confidence Interval

Code

#for univariate survival analysis type does not matter
regression_table(response=c("ttdeath", "death"),covariates=c("trt") , data=trial,
                  regression_type=survival,
                  type=multivariate,
                  add_n=TRUE,
                                    table_title="Table 14-1.42.1: Survival Analysis, univariate cox HR regression",
                  population="Full dataset",
                  output_type = c("tbl_summary")
                  # output_type = c("flextable")
                 )

**Table 14-1.42.1: Survival Analysis, univariate cox HR regression**
**Full dataset**
Characteristic	N	HR^1,2	95% CI²	p-value
Chemotherapy Treatment	200
Drug A	98	—	—
Drug B	102	1.25	0.86, 1.81	0.2
¹ Source: produced through simple regression with table.r
² HR = Hazard Ratio, CI = Confidence Interval