1 Load necessary library

if (!require("pacman")) install.packages("pacman")
  pacman::p_load(ggplot2, tidyr, dplyr, tidyverse, knitr, finalfit, here, DT)

2 Load Data

data(colon_s)
datatable(
  head(colon_s, 50),  # First 50 records
  filter = "top",     # Column filters
  options = list(
    scrollX = TRUE,   # Horizontal scrollbar
    pageLength = 20,  # Default rows per page
    lengthMenu = list(c(10, 20, 50, 100, -1), 
                      c("10", "20", "50", "100", "All")),  # Custom menu labels
    autoWidth = TRUE  # Adjust column widths
  ),
  rownames = FALSE
)

3 Demographic Table

# Specify explanatory variables of interest
explanatory <- c("age", "sex.factor", 
                "extent.factor", "obstruct.factor", 
                "nodes")

colon_s %>% 
  summary_factorlist("differ.factor", explanatory,
                     p=TRUE, na_include=TRUE)
##             label              levels        Well    Moderate        Poor
##       Age (years)           Mean (SD) 60.2 (12.8) 59.9 (11.7) 59.0 (12.8)
##               Sex              Female   51 (54.8)  314 (47.4)   73 (48.7)
##                                  Male   42 (45.2)  349 (52.6)   77 (51.3)
##                             (Missing)     0 (0.0)     0 (0.0)     0 (0.0)
##  Extent of spread           Submucosa     5 (5.4)    12 (1.8)     3 (2.0)
##                                Muscle   12 (12.9)   78 (11.8)    12 (8.0)
##                                Serosa   76 (81.7)  542 (81.7)  127 (84.7)
##                   Adjacent structures     0 (0.0)    31 (4.7)     8 (5.3)
##                             (Missing)     0 (0.0)     0 (0.0)     0 (0.0)
##       Obstruction                  No   69 (74.2)  531 (80.1)  114 (76.0)
##                                   Yes   19 (20.4)  122 (18.4)   31 (20.7)
##                             (Missing)     5 (5.4)    10 (1.5)     5 (3.3)
##             nodes           Mean (SD)   2.7 (2.2)   3.6 (3.4)   4.7 (4.4)
##       p
##   0.644
##   0.400
##        
##        
##   0.081
##        
##        
##        
##        
##   0.655
##        
##        
##  <0.001

4 Generate Final Table

colon_s <- colon_s %>% 
  mutate(
    nodes = ff_label(nodes, "Lymph nodes involved")
    )

table1 <- colon_s %>%  
  summary_factorlist("differ.factor", explanatory, 
                     p=TRUE, na_include=TRUE, 
                     add_dependent_label=TRUE,
                     dependent_label_prefix = "Exposure: "
                     )
table1
##  Exposure: Differentiation                            Well    Moderate
##                Age (years)           Mean (SD) 60.2 (12.8) 59.9 (11.7)
##                        Sex              Female   51 (54.8)  314 (47.4)
##                                           Male   42 (45.2)  349 (52.6)
##                                      (Missing)     0 (0.0)     0 (0.0)
##           Extent of spread           Submucosa     5 (5.4)    12 (1.8)
##                                         Muscle   12 (12.9)   78 (11.8)
##                                         Serosa   76 (81.7)  542 (81.7)
##                            Adjacent structures     0 (0.0)    31 (4.7)
##                                      (Missing)     0 (0.0)     0 (0.0)
##                Obstruction                  No   69 (74.2)  531 (80.1)
##                                            Yes   19 (20.4)  122 (18.4)
##                                      (Missing)     5 (5.4)    10 (1.5)
##       Lymph nodes involved           Mean (SD)   2.7 (2.2)   3.6 (3.4)
##         Poor      p
##  59.0 (12.8)  0.644
##    73 (48.7)  0.400
##    77 (51.3)       
##      0 (0.0)       
##      3 (2.0)  0.081
##     12 (8.0)       
##   127 (84.7)       
##      8 (5.3)       
##      0 (0.0)       
##   114 (76.0)  0.655
##    31 (20.7)       
##      5 (3.3)       
##    4.7 (4.4) <0.001

5 Logistic Regression Table

explanatory <- c( "differ.factor", "age", "sex.factor", 
                "extent.factor", "obstruct.factor", 
                "nodes")
dependent <- "mort_5yr"
table2 <- colon_s %>% 
  finalfit(dependent, explanatory, 
           dependent_label_prefix = "")
table2
##      Mortality 5 year                           Alive        Died
##       Differentiation                Well   52 (56.5)   40 (43.5)
##                                  Moderate  382 (58.7)  269 (41.3)
##                                      Poor   63 (42.3)   86 (57.7)
##           Age (years)           Mean (SD) 59.8 (11.4) 59.9 (12.5)
##                   Sex              Female  243 (55.6)  194 (44.4)
##                                      Male  268 (56.1)  210 (43.9)
##      Extent of spread           Submucosa   16 (80.0)    4 (20.0)
##                                    Muscle   78 (75.7)   25 (24.3)
##                                    Serosa  401 (53.5)  349 (46.5)
##                       Adjacent structures   16 (38.1)   26 (61.9)
##           Obstruction                  No  408 (56.7)  312 (43.3)
##                                       Yes   89 (51.1)   85 (48.9)
##  Lymph nodes involved           Mean (SD)   2.7 (2.4)   4.9 (4.4)
##            OR (univariable)         OR (multivariable)
##                           -                          -
##   0.92 (0.59-1.43, p=0.694)  0.62 (0.38-1.01, p=0.054)
##   1.77 (1.05-3.01, p=0.032)  1.00 (0.56-1.78, p=0.988)
##   1.00 (0.99-1.01, p=0.986)  1.01 (1.00-1.02, p=0.098)
##                           -                          -
##   0.98 (0.76-1.27, p=0.889)  0.97 (0.73-1.30, p=0.858)
##                           -                          -
##   1.28 (0.42-4.79, p=0.681)  1.25 (0.36-5.87, p=0.742)
##  3.48 (1.26-12.24, p=0.027) 3.03 (0.96-13.36, p=0.087)
##  6.50 (1.98-25.93, p=0.004) 6.80 (1.75-34.55, p=0.010)
##                           -                          -
##   1.25 (0.90-1.74, p=0.189)  1.26 (0.88-1.82, p=0.206)
##   1.24 (1.18-1.30, p<0.001)  1.24 (1.18-1.31, p<0.001)

6 Odds Ratio Plot

colon_s %>% 
  or_plot(dependent, explanatory, 
          breaks = c(0.5, 1, 5, 10, 20, 30),
          table_text_size = 2.5)

7 Save objects for knitr/markdown

# creat folder
dir.create(here::here("data"), showWarnings = FALSE, recursive = TRUE)

# Verify the path
file_path <- here::here("data", "out.rda")
print(file_path)
## [1] "D:/OneDrive/ncid/GitHub/code-reference/data/out.rda"
# Save the objects
save(table1, table2, dependent, explanatory, file = file_path)

7.1 Load Data

load(here::here("data", "out.rda"))

7.2 Table 1 - Demographics

kable(table1, row.names=FALSE, align=c("l", "l", "r", "r", "r", "r"))
Exposure: Differentiation Well Moderate Poor p
Age (years) Mean (SD) 60.2 (12.8) 59.9 (11.7) 59.0 (12.8) 0.644
Sex Female 51 (54.8) 314 (47.4) 73 (48.7) 0.400
Male 42 (45.2) 349 (52.6) 77 (51.3)
(Missing) 0 (0.0) 0 (0.0) 0 (0.0)
Extent of spread Submucosa 5 (5.4) 12 (1.8) 3 (2.0) 0.081
Muscle 12 (12.9) 78 (11.8) 12 (8.0)
Serosa 76 (81.7) 542 (81.7) 127 (84.7)
Adjacent structures 0 (0.0) 31 (4.7) 8 (5.3)
(Missing) 0 (0.0) 0 (0.0) 0 (0.0)
Obstruction No 69 (74.2) 531 (80.1) 114 (76.0) 0.655
Yes 19 (20.4) 122 (18.4) 31 (20.7)
(Missing) 5 (5.4) 10 (1.5) 5 (3.3)
Lymph nodes involved Mean (SD) 2.7 (2.2) 3.6 (3.4) 4.7 (4.4) <0.001

7.3 Table 2 - Association between tumour factors and 5-year mortality

kable(table2, row.names=FALSE, align=c("l", "l", "r", "r", "r", "r"))
Mortality 5 year Alive Died OR (univariable) OR (multivariable)
Differentiation Well 52 (56.5) 40 (43.5) - -
Moderate 382 (58.7) 269 (41.3) 0.92 (0.59-1.43, p=0.694) 0.62 (0.38-1.01, p=0.054)
Poor 63 (42.3) 86 (57.7) 1.77 (1.05-3.01, p=0.032) 1.00 (0.56-1.78, p=0.988)
Age (years) Mean (SD) 59.8 (11.4) 59.9 (12.5) 1.00 (0.99-1.01, p=0.986) 1.01 (1.00-1.02, p=0.098)
Sex Female 243 (55.6) 194 (44.4) - -
Male 268 (56.1) 210 (43.9) 0.98 (0.76-1.27, p=0.889) 0.97 (0.73-1.30, p=0.858)
Extent of spread Submucosa 16 (80.0) 4 (20.0) - -
Muscle 78 (75.7) 25 (24.3) 1.28 (0.42-4.79, p=0.681) 1.25 (0.36-5.87, p=0.742)
Serosa 401 (53.5) 349 (46.5) 3.48 (1.26-12.24, p=0.027) 3.03 (0.96-13.36, p=0.087)
Adjacent structures 16 (38.1) 26 (61.9) 6.50 (1.98-25.93, p=0.004) 6.80 (1.75-34.55, p=0.010)
Obstruction No 408 (56.7) 312 (43.3) - -
Yes 89 (51.1) 85 (48.9) 1.25 (0.90-1.74, p=0.189) 1.26 (0.88-1.82, p=0.206)
Lymph nodes involved Mean (SD) 2.7 (2.4) 4.9 (4.4) 1.24 (1.18-1.30, p<0.001) 1.24 (1.18-1.31, p<0.001)

7.4 Figure 1 - Association between tumour factors and 5-year mortality

explanatory = c( "differ.factor", "age", "sex.factor", 
                "extent.factor", "obstruct.factor", 
                "nodes")
dependent = "mort_5yr"
colon_s %>% 
  or_plot(dependent, explanatory, breaks = c(0.5, 1, 5, 10, 20, 30),
          table_text_size = 2.5)