get data

Download the full code of this paper and upload it as Rmd to your R https://tinyurl.com/onePagePaper

Download data and upload it to the same directory of this file https://tinyurl.com/RHDS1

Download the myScripts.R file and upload to same directory of this file. MAKE SURE TO DOWNLOAD AS CSV not XSLX https://drive.google.com/file/d/1DnTfoMEC5lHek-QMZD1e-vZyfqtAC6di/view?usp=share_link

Use csv file to get all in text

ds<-read.csv("Hodgkin.csv")

cat("Dimensions of data before cropping")
## Dimensions of data before cropping
dim(ds)
## [1] 999  29
#View(ds)
    
ds<-ds[1:285, 1:27]   


cat("Dimensions of data after cropping")
## Dimensions of data after cropping
dim(ds)
## [1] 285  27
sapply(ds, class)   # calsses of all colomns
##                Date.of.Diagnosis                        CR.timing 
##                      "character"                      "character" 
##                CR_After_2_cycles                        Age.at.DX 
##                        "integer"                        "numeric" 
##                     Chemotherapy                    Regimen.at.Dx 
##                      "character"                        "integer" 
##           Total.number.of.cycles           Total.Number.after.R.R 
##                        "integer"                        "integer" 
##                              BMT                        Radiation 
##                      "character"                      "character" 
## Radiation.with.primary.treatment                       Risk.Group 
##                        "integer"                      "character" 
##                            Stage                             Mets 
##                      "character"                      "character" 
##                        B.Symptom                            Bulky 
##                      "character"                      "character" 
##                      Nationality                          Outcome 
##                      "character"                      "character" 
##              Relapse.Progression      Date.of.Last.FU.or.rellapse 
##                      "character"                      "character" 
##                             Died         Date.of.death.or.last.FU 
##                        "integer"                      "character" 
##           Event..date.of.relapse                   Last.Follow.up 
##                      "character"                      "character" 
##                        Histology                           Weight 
##                      "character"                        "numeric" 
##                           Height 
##                        "numeric"
colnames(ds)<-janitor::make_clean_names(colnames(ds))   # make all colnames small letters with no spaces

head(ds)  # have a peek on the data
date_of_diagnosiscr_timingcr_after_2_cyclesage_at_dxchemotherapyregimen_at_dxtotal_number_of_cyclestotal_number_after_r_rbmtradiationradiation_with_primary_treatmentrisk_groupstagemetsb_symptombulkynationalityoutcomerelapse_progressiondate_of_last_fu_or_rellapsedieddate_of_death_or_last_fuevent_date_of_relapselast_follow_uphistologyweightheight
1/1/2004CR after 2nd ABVD215  4 ABVD140NoNo0LRIINoNoNoJordanianAlive in CRNo27/07/2007027/07/2007No27/07/2007MC42167
1/3/2004CR after 2nd ABVD214  5 ABVD+ 1 COPP260NoNo0HRIV NoNoNoJordanianAlive in CRNo15/05/2011015/05/2011No15/05/2011NS40161
1/6/2004CR after 2nd ABVD27  4 ABVD140NoNo0LRII NoNoNoNonAlive in CRNo28/07/2004028/07/2004No28/07/2004NS26119
1/6/2004PR after 2 ABVD, CR after 4 ABVD17  4 ABVD + 2 COPP260NoYes1HRIV YesYesNoJordanianAlive in CRNo25/03/2012025/03/2012No25/03/2012NS26119
1/6/2004PR after 2 ABVD, CR after 4 ABVD116  4 ABVD + 2 COPP260NoYes1HRIV YesYesNoJordanianAlive in CRYes24/04/2011024/04/2011Yes 02/01/200724/04/2011NS44173
1/7/2004CR after 2nd ABVD212.54 ABVD140NoNo0LRIINoNoNoJordanianAlive in CRNo01/07/201001/07/201No01/07/201NS37152

look at all data before cleaning

exclude=which(str_detect(colnames(ds), "Date|date|Do|Dte|mrn|name|last"))   # exclude columns that contain dates or IDs from summary

st_options( dfSummary.na.col = T, dfSummary.graph.col = F, dfSummary.valid.col = F)   # options to show missing data but not graph and valid columns

dfSummary(ds[,-exclude],round.digits=2, style="grid", justify="left", plain.ascii=TRUE,  append=FALSE,  varnumbers=FALSE, , max.distinct.values=10, trim.strings=FALSE, max.string.width=20, split.cells=40, escape.pipe=T)
VariableStats / ValuesFreqs (% of Valid)Missing
cr_timing\
[character]
1\. CR post 2ABVD\
2\. CR post 4 BEACOPP\
3\. CR after 2nd ABVD\
4\. CR post 2 ABVD\
5\. CR post 4 BEACOP\
6\. CR post 2 ABVD·\
7\. PR after 2 ABVD, CR \
8\. CR AFTER 4 BEACOP·\
9\. CR after 2 ABVD\
10\. CR AFTER 2 ABVD·\
[ 30 others ]
\85 (29.8%)\
\67 (23.5%)\
\35 (12.3%)\
\24 ( 8.4%)\
\12 ( 4.2%)\
\ 9 ( 3.2%)\
\ 9 ( 3.2%)\
\ 4 ( 1.4%)\
\ 3 ( 1.1%)\
\ 3 ( 1.1%)\
\34 (11.9%)
0\
(0.0%)
cr_after_2_cycles\
[integer]
Mean (sd) : 1.85 (0.46)\
min < med < max:\
0 < 2 < 2\
IQR (CV) : 0 (0.25)
0 : 12 ( 4.2%)\
1 : 18 ( 6.3%)\
2 : 255 (89.5%)
0\
(0.0%)
age_at_dx\
[numeric]
Mean (sd) : 11.69 (4.55)\
min < med < max:\
2 < 12.62 < 18\
IQR (CV) : 8.5 (0.39)
83 distinct values1\
(0.4%)
chemotherapy\
[character]
1\. BEACOPP\
2\. 4 ABVD\
3\. 4 ABVD + 2 COPP\
4\. 3 ABVD\
5\. 3 ABVD, 2 COPDac\
6\. 4 ABVD, 2 COPP\
7\. 4 ABVD, 2 COPDac\
8\. BEACOPP·\
9\. 4 ABVD·\
10\. 4 ABVD, 2 COPdac\
[ 36 others ]
\77 (27.0%)\
\60 (21.1%)\
\40 (14.0%)\
\18 ( 6.3%)\
\12 ( 4.2%)\
\11 ( 3.9%)\
\ 7 ( 2.5%)\
\ 7 ( 2.5%)\
\ 5 ( 1.8%)\
\ 4 ( 1.4%)\
\44 (15.4%)
0\
(0.0%)
regimen_at_dx\
[integer]
Mean (sd) : 1.99 (0.81)\
min < med < max:\
1 < 2 < 3\
IQR (CV) : 2 (0.41)
1 : 95 (33.3%)\
2 : 99 (34.7%)\
3 : 91 (31.9%)
0\
(0.0%)
total_number_of_cycles\
[integer]
Mean (sd) : 5.67 (1.6)\
min < med < max:\
3 < 6 < 11\
IQR (CV) : 2 (0.28)
3 : 21 ( 7.4%)\
4 : 70 (24.6%)\
5 : 10 ( 3.5%)\
6 : 129 (45.3%)\
7 : 1 ( 0.4%)\
8 : 49 (17.2%)\
9 : 1 ( 0.4%)\
10 : 3 ( 1.1%)\
11 : 1 ( 0.4%)
0\
(0.0%)
total_number_after_r_r\
[integer]
Mean (sd) : 0.15 (0.67)\
min < med < max:\
0 < 0 < 5\
IQR (CV) : 0 (4.55)
0 : 270 (94.7%)\
2 : 9 ( 3.2%)\
3 : 1 ( 0.4%)\
4 : 4 ( 1.4%)\
5 : 1 ( 0.4%)
0\
(0.0%)
bmt\
[character]
1\. No\
2\. Yes
\274 (96.1%)\
\ 11 ( 3.9%)
0\
(0.0%)
radiation\
[character]
1\. no\
2\. No\
3\. No·\
4\. TBI at relapse\
5\. Yes\
6\. yes·\
7\. Yes (at relapse)\
8\. Yes at relaps)\
9\. Yes at relapse\
10\. Yes at replase·\
11\. Yes, BONE
\ 1 ( 0.4%)\
\210 (73.7%)\
\ 2 ( 0.7%)\
\ 1 ( 0.4%)\
\ 55 (19.3%)\
\ 7 ( 2.5%)\
\ 1 ( 0.4%)\
\ 1 ( 0.4%)\
\ 3 ( 1.1%)\
\ 3 ( 1.1%)\
\ 1 ( 0.4%)
0\
(0.0%)
radiation_with_primary_treatment\
[integer]
Mean (sd) : 0.32 (0.61)\
min < med < max:\
0 < 0 < 3\
IQR (CV) : 1 (1.93)
0 : 213 (74.7%)\
1 : 58 (20.4%)\
2 : 10 ( 3.5%)\
3 : 4 ( 1.4%)
0\
(0.0%)
risk_group\
[character]
1\. HR\
2\. HR·\
3\. HR·\
4\. IR\
5\. IR·\
6\. LR\
7\. LR·
\120 (42.1%)\
\ 16 ( 5.6%)\
\ 1 ( 0.4%)\
\ 52 (18.2%)\
\ 5 ( 1.8%)\
\ 88 (30.9%)\
\ 3 ( 1.1%)
0\
(0.0%)
stage\
[character]
1\. II\
2\. II·\
3\. IV\
4\. IIIS\
5\. III\
6\. IV·\
7\. I\
8\. III·\
9\. IV·\
10\. I·\
[ 3 others ]
\94 (33.0%)\
\46 (16.1%)\
\32 (11.2%)\
\26 ( 9.1%)\
\21 ( 7.4%)\
\19 ( 6.7%)\
\16 ( 5.6%)\
\15 ( 5.3%)\
\ 6 ( 2.1%)\
\ 4 ( 1.4%)\
\ 6 ( 2.1%)
0\
(0.0%)
mets\
[character]
1\. No\
2\. yes\
3\. Yes\
4\. YES·
\228 (80.0%)\
\ 1 ( 0.4%)\
\ 55 (19.3%)\
\ 1 ( 0.4%)
0\
(0.0%)
b_symptom\
[character]
1\. No\
2\. yes\
3\. Yes
\153 (53.7%)\
\ 2 ( 0.7%)\
\130 (45.6%)
0\
(0.0%)
bulky\
[character]
1\. No\
2\. yes\
3\. Yes
\177 (62.1%)\
\ 1 ( 0.4%)\
\107 (37.5%)
0\
(0.0%)
nationality\
[character]
1\. Jordanian\
2\. Kuwati\
3\. Non\
4\. Syrian
\251 (88.1%)\
\ 1 ( 0.4%)\
\ 32 (11.2%)\
\ 1 ( 0.4%)
0\
(0.0%)
outcome\
[character]
1\. Alive in CR\
2\. Alive in CR\
3\. Alive Off Therapy in\
4\. AWD\
5\. Died\
6\. Died with AML\
7\. Died in CR/ Sepsis\
8\. died with disease\
9\. Died with disease\
10\. Died with sepsis\
11\. Lost to followup
\201 (70.5%)\
\ 62 (21.8%)\
\ 8 ( 2.8%)\
\ 1 ( 0.4%)\
\ 1 ( 0.4%)\
\ 1 ( 0.4%)\
\ 1 ( 0.4%)\
\ 4 ( 1.4%)\
\ 1 ( 0.4%)\
\ 1 ( 0.4%)\
\ 4 ( 1.4%)
0\
(0.0%)
relapse_progression\
[character]
1\. no\
2\. No\
3\. Yes
\193 (67.7%)\
\ 67 (23.5%)\
\ 25 ( 8.8%)
0\
(0.0%)
died\
[integer]
Min : 0\
Mean : 0.03\
Max : 1
0 : 277 (97.2%)\
1 : 8 ( 2.8%)
0\
(0.0%)
histology\
[character]
1\. (Empty string)\
2\. HD\
3\. Interfolicular\
4\. Interfollicular\
5\. LP\
6\. LR\
7\. Lymphocyte Rich\
8\. MC\
9\. NS
\ 2 ( 0.8%)\
\ 1 ( 0.4%)\
\ 1 ( 0.4%)\
\ 2 ( 0.8%)\
\ 7 ( 2.7%)\
\ 4 ( 1.5%)\
\ 1 ( 0.4%)\
\ 73 (28.1%)\
\169 (65.0%)
25\
(8.8%)
weight\
[numeric]
Mean (sd) : 35.29 (9.19)\
min < med < max:\
12 < 37 < 48\
IQR (CV) : 17 (0.26)
84 distinct values0\
(0.0%)
height\
[numeric]
Mean (sd) : 146.88 (27.57)\
min < med < max:\
77 < 152 < 185\
IQR (CV) : 51 (0.19)
84 distinct values0\
(0.0%)

Look at columns unique values to understand how messy your data

# This code can be made better!
DS<- ds %>%                     # create a new dataset
  select(-exclude) %>%          # exclude defined above
  select(where(is.character))   # select only character columns
  
lapply(DS, unique)              # get unique elements in each column of DS
## $cr_timing
##  [1] "CR after 2nd ABVD"                                
##  [2] "PR after 2 ABVD, CR after 4 ABVD"                 
##  [3] "CR after 2nd ABVD, CR after 2 COPP"               
##  [4] "CR after 2 ABVD"                                  
##  [5] "CR after 2 ABVD, CR post  AlloBMT"                
##  [6] "PR after 2nd ABVD"                                
##  [7] "PR after 2 ABVD, CR after 4 ABVD, 2 ICE"          
##  [8] "PR after 2 ABVD"                                  
##  [9] "CR post 2ABVD"                                    
## [10] "PR post2ABVD"                                     
## [11] "CR post 4 BEACOPP"                                
## [12] "PR post 2ABVD, CR post 4 ABVD"                    
## [13] "PR post 2ABVD, PR post 4 ABVD"                    
## [14] "CR post 2ABVD, CR post 2 ICE and ABMT"            
## [15] "CR post 2 ABVD"                                   
## [16] "CR post 2 ABVD, then Post 2 COPDac"               
## [17] "PR post 4 BEACOPP"                                
## [18] "CR post 2ABVD, CR post 2 COPP"                    
## [19] "PR post 2ABVD"                                    
## [20] "CR AFTER 4 BEACOP "                               
## [21] "CR AFTER 2 ABVD , 2 post GV, ICE"                 
## [22] "CR AFTER2 ABVD "                                  
## [23] "CR AFTER 2 ABVD "                                 
## [24] "CR AFTER 4 BEACOP"                                
## [25] "PR post 2 ABVD"                                   
## [26] "CR post 4 BEACOPP, CR post IGEV"                  
## [27] "CR post 2 ABVD, CR post ICE and ABMT"             
## [28] "CR post 3rd regimen"                              
## [29] "CRpost 2 ABVD"                                    
## [30] "CR after all chemo"                               
## [31] "CRpostr 4 BEACOPP"                                
## [32] "CR post 3 BEACOPP and ABVD"                       
## [33] "CR after 5 BEACOPP, 2ICE"                         
## [34] "CR post 2ABVD, then post ICE , then post Keytruda"
## [35] "CR post 4 BEACOP"                                 
## [36] "CR post 2 ABVD "                                  
## [37] "PR post 4 BEACOP"                                 
## [38] "PR post 2 ABVD "                                  
## [39] "PR post 2ABVD, CR  post 5 cycles"                 
## [40] "PR post 2ABVD, PR post 4 ABVD, CR post 2 ICE"     
## 
## $chemotherapy
##  [1] "4 ABVD"                                               
##  [2] "5 ABVD+ 1 COPP"                                       
##  [3] "4 ABVD + 2 COPP"                                      
##  [4] "4 ABVD then 4 COPP"                                   
##  [5] "4 ABVD + 2 COPP, 2 ICE , ABMT, AlloBMT"               
##  [6] "4 ABVD+ 2 COPP"                                       
##  [7] "4 ABVD "                                              
##  [8] "4ABVD, 2COPP, ICE, ABMT"                              
##  [9] "3 ABVD"                                               
## [10] "4 ABVD+ 2 COPP, 2nd line"                             
## [11] "4ABVD, 2COPP, ICE, ABMT, GV, DHAP XRT"                
## [12] "4ABVD, 2COPP, ICE"                                    
## [13] "4ABVD "                                               
## [14] "BEACOPP"                                              
## [15] "4 ABVD, 2 ICE"                                        
## [16] "4 ABVD + 2 COPdac"                                    
## [17] "5 ABVD + 2 COPdac"                                    
## [18] "4 ABVD, 4 COPDac"                                     
## [19] "4 ABVD, 2 COPP"                                       
## [20] "4ABVD"                                                
## [21] "4 ABVD, then 4 COPP"                                  
## [22] "4 ABVD, 2 COPdac"                                     
## [23] "5 ABVD"                                               
## [24] "4 ABVD, 2 COPDac"                                     
## [25] "4 ABVD, 2 COPDac, 3 ICE, 2 GV, ABMT"                  
## [26] "4 ABVD, 2 COPDac, 2 ICE,  ABMT"                       
## [27] "4 ABVD, 2COPDac, 2 ICE"                               
## [28] "3 ABVD, (3RICE at relapse)"                           
## [29] "3 ABVD "                                              
## [30] "4 BEACOP, 2COPP/ABV, 2 ICE"                           
## [31] "3 ABVD, (4COPDac at relapse)"                         
## [32] "3 ABVD, 2 COPDac"                                     
## [33] "BEACOPP (IGEV at relape)"                             
## [34] " BEACOPP"                                             
## [35] "4 ABVD, 2COPDac"                                      
## [36] "5 BEACOPP, 2ICE, Auto BMT"                            
## [37] "4 ABVD, 2COPDAC , 2 ICE, 2 GV Bend(auto and allo BMT)"
## [38] "4 ABVD, 2COPP"                                        
## [39] " BEACOPP, IGEV, keytrauda, auto BMT after replase)"   
## [40] "Chemo, ABMT, AlloBMT"                                 
## [41] "3 ABVD, 2COPP"                                        
## [42] "BEACOPP "                                             
## [43] "3 ABVD, 2 COPP"                                       
## [44] "3 ABVD, 1 COPDac"                                     
## [45] "BEACOPP/ICE ABMT"                                     
## [46] "3 ABVD, 4 COPP, 2 ICE , XRT"                          
## 
## $bmt
## [1] "No"  "Yes"
## 
## $radiation
##  [1] "No"               "Yes"              "Yes at replase "  "no"              
##  [5] "Yes, BONE"        "Yes at relapse"   "Yes at relaps)"   "Yes (at relapse)"
##  [9] "TBI at relapse"   "No "              "yes "            
## 
## $risk_group
## [1] "LR"   "HR"   "HR  " "IR"   "HR "  "IR "  "LR " 
## 
## $stage
##  [1] "II"   "IV "  "II "  "IV  " "I"    "III " "IV"   "I "   "III"  "IIIS"
## [11] "IVS"  "II  " ""    
## 
## $mets
## [1] "No"   "Yes"  "yes"  "YES "
## 
## $b_symptom
## [1] "No"  "Yes" "yes"
## 
## $bulky
## [1] "No"  "Yes" "yes"
## 
## $nationality
## [1] "Jordanian" "Non"       "Kuwati"    "Syrian"   
## 
## $outcome
##  [1] "Alive in CR"             "Alive  in CR"           
##  [3] "Alive Off Therapy in CR" "died with disease"      
##  [5] "Died with sepsis"        "Lost to followup"       
##  [7] "Died  with AML"          "Died with disease"      
##  [9] "AWD"                     "Died"                   
## [11] "Died in CR/ Sepsis"     
## 
## $relapse_progression
## [1] "No"  "Yes" "no" 
## 
## $histology
##  [1] "MC"              "NS"              NA                "LP"             
##  [5] "Interfolicular"  "Interfollicular" "Lymphocyte Rich" "LR"             
##  [9] "HD"              ""
rm(DS)          # delete DS since we are not going to use it

Data cleaning

cat("Stages before cleaning\n")
## Stages before cleaning
ds %>% count(stage)
stagen
1
I16
I 4
II94
II 46
II 2
III21
III 15
IIIS26
IV32
IV 19
IV 6
IVS3
ds<- ds %>% 
  mutate(across(where(is.character), stringr::str_to_upper)) %>%    # convert all data to upper case
  mutate(across(where(is.character), stringr::str_squish))          # remove spaces from both ends " V " becomes "V" : important step

cat("Stages after cleaning\n")
## Stages after cleaning
ds %>% count(stage)
stagen
1
I20
II142
III36
IIIS26
IV57
IVS3

clean the column of timing to CR by creating 2 columns

ds<-ds %>% 
  mutate(best_response=if_else(str_detect(cr_timing, "CR"), "CR", "PR")) %>%    # get CR or PR
  mutate(best_response_after=str_extract(cr_timing, "(\\d)+"))                  # get only first number

# count(ds, best_response_after)

clean histology

cat("Histology before cleaning\n")
## Histology before cleaning
ds %>% 
  count(histology)
histologyn
2
HD1
INTERFOLICULAR1
INTERFOLLICULAR2
LP7
LR4
LYMPHOCYTE RICH1
MC73
NS169
25
ds <- ds %>% 
  mutate(histology=recode(histology,
                          "INTERFOLICULAR"="Int",
                          "INTERFOLLICULAR"="Int",
                          "LYMPHOCYTE RICH"="LR"))%>% 
  replace(., .=="", NA)  # this replaces all empty cells with NA

cat("Histology after cleaning\n")
## Histology after cleaning
ds %>% 
  count(histology)
histologyn
HD1
Int3
LP7
LR5
MC73
NS169
27
# unique(ds$histology)

clean the radiation column

cat("Radiation before cleaning\n")
## Radiation before cleaning
ds %>% count(radiation)
radiationn
NO213
TBI AT RELAPSE1
YES62
YES (AT RELAPSE)1
YES AT RELAPS)1
YES AT RELAPSE3
YES AT REPLASE3
YES, BONE1
ds <- ds %>% mutate(radiation=if_else(radiation %in% c("YES", "YES, BONE"), "YES", "NO"))


cat("Radiation after cleaning\n")
## Radiation after cleaning
ds %>% count(radiation)
radiationn
NO222
YES63

clean the dates

If some dates are text you may get an error here. Go and check your dates

ds<- ds %>% 
  mutate(across(starts_with("date"), function(x) lubridate::as_datetime(sapply(x, fix_date))))  # apply fix date function from myScripts.R which should fix any date

cat("Which dates are NA to fix the original data\n")
## Which dates are NA to fix the original data
lapply(ds%>% select(starts_with("date")) , function(x) which(is.na(x)|x=="9999-09-09")) # this is the value returned by fix_date for NA dates
## $date_of_diagnosis
## 2014/011/25 
##         184 
## 
## $date_of_last_fu_or_rellapse
## 01/07/201 
##         6 
## 
## $date_of_death_or_last_fu
## 01/07/201 
##         6

create dead and relapse column

ds$dead<- str_detect(ds$outcome, "DIED")
ds$event<- ifelse(ds$relapse_progression=="YES" | ds$dead, T, F)

calculate OS and EFS

ds<- 
  ds %>% 
  mutate(OS=as.numeric(date_of_death_or_last_fu-date_of_diagnosis)/30) %>% 
  mutate(EFS=as.numeric(date_of_last_fu_or_rellapse-date_of_diagnosis)/30)

calculate median follow up

This is defined as the median duration of follow up from diagnosis to last follow up or death for patients who did not die

cat("Median FU in months=")
## Median FU in months=
summary(ds %>% filter(dead==0) %>% pull(OS))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   -7.90   25.32   48.42   58.80   85.51  179.57       2

table 1

ds %>% 
  select(histology, stage, bulky, b_symptom, mets,  radiation, best_response, best_response_after, dead, event, bmt) %>% 
  tbl_summary(by=best_response,                            # compare patients according to response 
              label = list(histology ~ "Histology",        # rename rows of the table
                           stage ~ "Stage",
                           bulky ~ "Bulky",
                           b_symptom ~ "B symptoms",
                           radiation ~ "Radiation",
                           best_response ~ "Best response",
                           best_response_after ~ "Number of cycles",
                           dead ~ "Patient died",
                           event ~ "Patient had event",
                           bmt ~ "Bone Marrow Transplantation")) %>% 
  add_p()   # add p values 
Characteristic CR, N = 2721 PR, N = 131 p-value2
Histology >0.9
    HD 1 (0.4%) 0 (0%)
    Int 3 (1.2%) 0 (0%)
    LP 7 (2.9%) 0 (0%)
    LR 5 (2.0%) 0 (0%)
    MC 69 (28%) 4 (31%)
    NS 160 (65%) 9 (69%)
    Unknown 27 0
Stage 0.7
    I 20 (7.4%) 0 (0%)
    II 136 (50%) 6 (46%)
    III 35 (13%) 1 (7.7%)
    IIIS 25 (9.2%) 1 (7.7%)
    IV 52 (19%) 5 (38%)
    IVS 3 (1.1%) 0 (0%)
    Unknown 1 0
Bulky 101 (37%) 7 (54%) 0.3
B symptoms 123 (45%) 9 (69%) 0.090
mets 52 (19%) 5 (38%) 0.15
Radiation 51 (19%) 12 (92%) <0.001
Number of cycles 0.4
    2 182 (67%) 11 (85%)
    3 2 (0.7%) 0 (0%)
    4 86 (32%) 2 (15%)
    5 1 (0.4%) 0 (0%)
    Unknown 1 0
Patient died 6 (2.2%) 3 (23%) 0.005
Patient had event 26 (9.6%) 3 (23%) 0.13
Bone Marrow Transplantation 9 (3.3%) 2 (15%) 0.084
1 n (%)
2 Fisher's exact test; Pearson's Chi-squared test

Donut chart for stages and histologies

A<- ds %>% mutate(var=stage) %>%  count(var) %>% filter(!is.na(var)) %>% donut_chart()
B<- ds %>%  mutate(histology=ifelse(histology %in% c("MC","NS", "LP"), histology, "Others")) %>% mutate(var=histology)%>%  count(var)  %>% filter(!is.na(var)) %>% donut_chart()

ggarrange(A,B, labels="AUTO")

Histogram for age and histology

ds %>% 
  mutate(histology=ifelse(histology %in% c("MC","NS", "LP"), histology, "Others")) %>%
  ggplot(aes(x=age_at_dx, fill=  fct_rev (fct_infreq(histology))))+
  geom_histogram(bins=17)+
  theme_classic2()+
  labs(x="Age", y="Frequency", fill="Histology")+
  theme(legend.position = c(0.2,0.8))+
  scale_fill_brewer(palette="Set1")+
  scale_x_continuous(breaks=2:18)

Draw the curves of OS and EFS

os<-survfit(Surv(OS, dead)~1, data=ds)

OS<-ggsurvplot(os, data = ds, legend="none", conf.int = F, palette="red", xlab="Months", break.time.by=12)

efs<- survfit(Surv(EFS, event)~1, data=ds)

EFS<- ggsurvplot(efs,  data = ds, legend="none", conf.int = F, palette="blue", xlab="Months", break.time.by=12)

plots<- list(OS=os,EFS=efs)

ggsurvplot(plots, data=ds, combine = TRUE,
           palette=c("red", "blue"),
           break.time.by=12, xlab="Months",
           legend=c(0.2,0.15),
           legend.title="",
           legend.labs=c("Overall Survival", "Event Free Survival"),
           surv.scale="percent", xlim=c(0,60))

# compare outcome for those who had PR and CR

source("myScripts.R")  # this will upload all functions in myScripts
ds %>% mutate(var=best_response, os=OS/12) %>% OS("according to best response")

## according to best response
##                Characteristic       Time 1            Time 5       
##              ──────────────────────────────────────────────────────
##                Overall          98% (97%, 100%)   96% (94%, 99%)   
##                var                                                 
##                CR               98% (97%, 100%)   97% (95%, 100%)  
##                PR               92% (79%, 100%)   55% (27%, 100%)  
## 
## Column names: label, stat_1, stat_2
## P= 1.950382e-06
ds %>% mutate(var=best_response, efs=EFS/12) %>% EFS("according to best response")

## according to best response
##                Characteristic       Time 1            Time 5       
##              ──────────────────────────────────────────────────────
##                Overall          98% (97%, 100%)   88% (84%, 93%)   
##                var                                                 
##                CR               98% (97%, 100%)   89% (85%, 94%)   
##                PR               92% (79%, 100%)   55% (27%, 100%)  
## 
## Column names: label, stat_1, stat_2
## P= 0.01276748

Cox regression for survival

ds$best_response_after_2cycles=ifelse(ds$best_response_after=="2", "2 cycles", "More than 2")

explanatory=Cs(histology, stage, bulky, b_symptom, mets,  radiation, best_response, best_response_after_2cycles)

ds<- ds %>% mutate(across(explanatory, as.factor))
ds<-ds %>% mutate(histology=fct_relevel(histology, "NS"),
                  histology=fct_lump(histology, 4))

dependent = "Surv(OS, dead)"



ds %>%  finalfit(dependent, explanatory) ->t
gt(t)
Dependent: Surv(OS, dead) all HR (univariable) HR (multivariable)
histology NS 169 (65.5) - -
LP 7 (2.7) 0.00 (0.00-Inf, p=0.999) 916398.33 (0.00-Inf, p=1.000)
LR 5 (1.9) 0.00 (0.00-Inf, p=0.999) 0.02 (0.00-Inf, p=1.000)
MC 73 (28.3) 0.49 (0.05-4.42, p=0.528) 6.33 (0.50-80.00, p=0.154)
Other 4 (1.6) 0.00 (0.00-Inf, p=0.999) 250848.07 (0.00-Inf, p=1.000)
stage I 20 (7.0) - -
II 142 (50.0) 85992615.44 (0.00-Inf, p=0.999) 0.77 (0.11-5.27, p=0.786)
III 36 (12.7) 1.00 (0.00-Inf, p=1.000) 0.00 (0.00-Inf, p=0.997)
IIIS 26 (9.2) 175449624.16 (0.00-Inf, p=0.999) 0.00 (0.00-Inf, p=0.997)
IV 57 (20.1) 365804748.26 (0.00-Inf, p=0.999) 0.08 (0.01-0.58, p=0.012)
IVS 3 (1.1) 0.99 (0.00-Inf, p=1.000) 0.00 (0.00-Inf, p=1.000)
bulky NO 177 (62.1) - -
YES 108 (37.9) 1.39 (0.37-5.16, p=0.627) 0.68 (0.10-4.64, p=0.692)
b_symptom NO 153 (53.7) - -
YES 132 (46.3) 4.16 (0.86-20.04, p=0.075) 37871418.21 (0.00-Inf, p=0.994)
mets NO 228 (80.0) - -
YES 57 (20.0) 3.19 (0.86-11.87, p=0.084) 6.72 (0.98-46.27, p=0.053)
radiation NO 222 (77.9) - -
YES 63 (22.1) 7.71 (1.93-30.86, p=0.004) 247965289.99 (0.00-Inf, p=0.995)
best_response CR 272 (95.4) - -
PR 13 (4.6) 13.61 (3.35-55.36, p<0.001) 5.46 (0.86-34.68, p=0.072)
best_response_after_2cycles 2 cycles 193 (68.0) - -
More than 2 91 (32.0) 0.25 (0.03-2.04, p=0.197) 0.14 (0.02-1.31, p=0.085)