Methods

lookupCategory<-read.csv("lookupCAtegory.csv")


ds<- read.csv("list19Apr23.csv") %>% 
  clean_names() %>% 
  mutate(across(where(is.character), stringr::str_to_upper)) %>%    
  mutate(across(where(is.character), stringr::str_squish))  %>%     
  mutate(date_of_death=if_else(date_of_death=="00/00/0000", "4/19/2023", date_of_death)) %>% 
  mutate(across(contains(c("date", "dte")), function(x) parse_date_time2(x, order=c("dBY" , "mdY", "dbY")))) %>% 
  mutate(dead=if_else(vital_status== "ALIVE(1)", 0, 1)) %>% 
  mutate(OS= as.numeric(difftime(date_of_death , date_of_diagnosis, units="days"))/30) %>% 
  mutate(date_of_birth=date_of_diagnosis-years(age_at_diagnosis)) %>% 
  mutate(nationality = ifelse(nationality=="JORDAN(01)", "Jordanian", "Nonjordanian")) %>% 
  mutate(site=str_extract(primary_site, "(?<=\\()([^)]+)(?=\\))")) %>% 
  mutate(histology = str_extract(histology_behavior_icd_o_3, "\\(\\d{4}"),
         histology = str_remove(histology, "\\(")) %>% 
  mutate(record_number=  str_extract(accession_seq, "(?<=/)\\d+")) %>% 
  mutate(year_of_diagnosis=year(date_of_diagnosis)) %>% 
  filter(year_of_diagnosis>=2010) 
  

library(stringr)

# https://seer.cancer.gov/ayarecode/aya-who2008.html       4/4/2021
aya <- read.csv("ayarecodewho2008.txt", sep=";")

aya<- aya[-98,]
for (l in 1:nrow(aya)) if (aya[l,1]=="") aya[l,1]<-aya[l-1,1]

split<-function(x){
  result<-c()
  t<-unlist(strsplit(x, split=","))
  for (b in t){
    sapply(str_extract_all(b, '[0-9.]+'), function(x) as.numeric(x)) ->a
    if (length(a)>1) a<-c(a[1,1]:a[2,1])
    result<-c(result, a)}
  return (result)
}

aya$site<-list(c())
for (l in 1:nrow(aya)) if (aya[l,3]!=" ") aya$site[[l]]<-split(aya[l,3])

aya$hist<-list(c())
for (l in 1:nrow(aya)) if (aya[l,4]!=" ") aya$hist[[l]]<-split(aya[l,4])


aya$category<-c("")
for (l in 1:nrow(aya)) if (!grepl("\\.", aya[l,1])) aya$category[l]=aya[l,1]
for (l in 1:nrow(aya)) if (aya[l,8]=="") aya[l,8]<-aya[l-1,8]
excludeEmpty<-which(aya$Primary.Site==" ")
aya %>% slice(-excludeEmpty)->aya

########################################
#  patients
########################################

removeParenthesis<-function(x)gsub("\\s*\\([^\\)]+\\)","",as.character(x)) 

ds$site<- removeParenthesis(ds$site) %>%  substr(start = 2, stop = 4) %>% as.numeric
ds$histology<- ds$histology %>% as.numeric


ds$aya<-c("")
ds$ayaCategory<-c("")

for (m in 1:nrow(ds)){
  site<- ds$site[m]
  histology<- ds$histology[m]
  for (n in 1:nrow(aya)){
    if (is.element(site, aya$site[[n]]) & is.element(histology, aya$hist[[n]])){
      ds$aya[m] <-aya$SEER.AYA.Site.Recode.WHO.2008.Definition.Site.Group[n]
      ds$ayaCategory[m]<-aya$category[n]
    }}}

ds$aya<-stringr::str_squish(ds$aya)

ds<- ds%>%  left_join(lookupCategory)

ds$category<- factor(ds$category, levels=c("Leukemia", "Lymphoma", "CNS", "Solid", "Bone", "STS"))

# ds<- ds %>% filter(aya!="", OS>=0) %>% mutate(Histology=as.character(Histology))
# 
# ds$OS <- if_else(ds$Age<18 & ds$Nationality=="Jordanian" & ds$year>2015 & ds$aya=="1.1 Acute lymphoid leukemia", ds$OS*2, ds$OS)


gender<- read_csv("cancer_registry_1Feb2024.csv") %>% janitor::clean_names() %>% 
  dplyr::select(gender=sex, medical_record_number) %>% 
  distinct(medical_record_number, .keep_all = T)


ds<- left_join(ds, gender) %>% filter(!is.na(gender)) %>%  filter(!is.na(category)) %>% 
  mutate(gender=recode(gender,
                       "1 (MALE)"="MALE",
                       "2 (FEMALE)"="FEMALE"))
vitals<- read.csv("Vitals.csv") %>% 
  clean_names()%>% 
  inner_join(ds[, c("mrn", "histology_behavior_icd_o_3", "date_of_birth", "date_of_diagnosis", "category", "ayaCategory", "aya","gender", "age_at_diagnosis", "OS", "dead")]) %>% 
  mutate(date=as.Date(mdy_hm(date_time_vitals_taken))) %>% 
  mutate(age=as.numeric(difftime(date, date_of_birth,  units="days"))/360) %>% 
  mutate(months=as.numeric(difftime(date,date_of_diagnosis,  units="days"))/30)


save(ds, file="ds.RData") 
save(vitals, file="vitals.RData")

Loading data

You will get these files ds.RData and vitals.RData
Just remove the hashtags# below and load the data to have 2 dataframes ds and vitals

# load("ds.RData")
# load("vitals.RData")
HeightWeight<- vitals %>% filter(vital_type %in% c("HEIGHT", "WEIGHT"))


HeightWeight %>% 
  filter(vital_type=="WEIGHT") %>% 
  mutate( rate=as.numeric(rate), rate=0.45359237*rate) %>% 
  mutate(wfs=sds(rate, age = age, sex = gender, male = "MALE", female = "FEMALE",
                 ref = turkish.ref  , item = "weight", type = "perc")) %>% 
  dplyr::select(weight=rate, date, age, months, wfs, mrn, category, gender, age_at_diagnosis, OS, dead) -> weight



HeightWeight %>% 
  filter(vital_type=="HEIGHT") %>% 
  mutate(rate=as.numeric(rate), rate=2.54*rate) %>% 
  mutate(hfs=sds(rate, age = age, sex = gender, male = "MALE", female = "FEMALE",
                 ref = turkish.ref  , item = "height", type = "perc")) %>% 
  dplyr::select(height=rate, date, age, months, hfs, mrn, category, gender, age_at_diagnosis, OS, dead) ->height


full_join(height, weight) %>% 
  mutate(bmi=weight/((0.01*height)^2)) ->BMI


BMI<-BMI %>% mutate(bfs=sds(bmi, age = age, sex = gender, male = "MALE", female = "FEMALE",
                 ref = turkish.ref  , item = "bmi_bundak", type = "perc")) %>% 
  dplyr::select( age, months,height, hfs, weight, wfs, bmi ,  bfs,  mrn, category, gender, age_at_diagnosis, OS, dead) %>% 
  filter(age>=0, months>=0)

BMI<- BMI %>% 
  mutate(BMI_Categories = case_when(
      bfs < 0.05 ~ "Underweight",
      bfs >= 0.05 & bfs < 0.85 ~ "Healthy weight",
      bfs >= 0.85 & bfs < 0.95 ~ "Overweight",
      bfs >= 0.95 ~ "Obese",
      TRUE ~ NA_character_)) %>%  
  mutate(Height_Categories = case_when(
      hfs < 0.05 ~ "Short",
      hfs >= 0.95 ~ "Tall",
      TRUE ~ "Normal")) %>% 
mutate(Weight_Categories = case_when(
      wfs < 0.05 ~ "Low weight",
      wfs >= 0.95 ~ "Increased Weight",
      TRUE ~ "Normal")) 

Donut chart showing disribution of patients

ds %>% 
  filter(mrn %in% unique(vitals$mrn)) %>% 
  mutate(var=category) %>% count(var) %>% 
  donut_chart()

Table showing charachterstics of patients

ds %>% 
  filter(mrn %in% unique(vitals$mrn)) %>% 
  mutate(year=year(date_of_diagnosis)) %>% 
  dplyr::select(c("age_at_diagnosis","gender", "year", "category", "ayaCategory")) %>% 
  tbl_summary()
Characteristic N = 3,7491
age_at_diagnosis 6 (2, 12)
gender
    FEMALE 1,619 (43%)
    MALE 2,130 (57%)
year 2,016.0 (2,014.0, 2,019.0)
category
    Leukemia 1,042 (28%)
    Lymphoma 619 (17%)
    CNS 560 (15%)
    Solid 1,065 (28%)
    Bone 250 (6.7%)
    STS 213 (5.7%)
ayaCategory
    1 Leukemias 1,042 (28%)
    10 Unspecified Malignant Neoplasms 10 (0.3%)
    2 Lymphomas 619 (17%)
    3 CNS and Other Intracranial and Intraspinal Neoplasms (all behaviors) 560 (15%)
    4 Osseous & Chondromatous Neoplasms 250 (6.7%)
    5 Soft Tissue Sarcomas 213 (5.7%)
    6 Germ Cell and Trophoblastic Neoplasms 100 (2.7%)
    7 Melanoma and Skin Carcinomas 15 (0.4%)
    8 Carcinomas 111 (3.0%)
    9 Miscellaneous specified neoplasms, NOS 829 (22%)
1 Median (IQR); n (%)

count of patients per year of diagnosis

ds %>% 
  mutate(year=as.character(year(date_of_diagnosis))) %>% count(year) %>% gt()
year n
2010 342
2011 384
2012 362
2013 373
2014 364
2015 384
2016 397
2017 383
2018 293
2019 336
2020 336
2021 362

median number of readings (height or weight) recorded per patient with 25 and 75% quantiles

HeightWeight %>% 
  group_by(mrn) %>% 
  summarise(n=n()) %>% 
  summarise(Total=sum(n),
            median=median(n),
    q25 = quantile(n, probs = 0.25, na.rm = TRUE),  # 25th percentile
    q75 = quantile(n, probs = 0.75, na.rm = TRUE)   # 75th percentile
  ) %>% gt()
Total median q25 q75
337589 66 26 130

Growth paramaters at diagnosis

Distribution lines of BMI, height and weight of children with cancer at diagnosis showing bimodal distribution of weight/BMI and mainly right-skewed curve for height

weight %>%  filter(!is.na(weight))%>%   arrange(months) %>%   group_by(mrn) %>% slice(1) -> weight_at_diagnosis
height %>%  filter(!is.na(height))%>%   arrange(months) %>%   group_by(mrn) %>% slice(1) -> height_at_diagnosis
BMI %>%     filter(!is.na(bmi))%>%      arrange(months) %>%   group_by(mrn) %>% slice(1) -> BMI_at_diagnosis



# Assuming BMI_at_diagnosis, weight_at_diagnosis, and height_at_diagnosis are your data frames
# Combine them while dropping duplicated columns

combined_df <- BMI_at_diagnosis %>%
  full_join(weight_at_diagnosis, by = intersect(names(BMI_at_diagnosis), names(weight_at_diagnosis))) %>%
  full_join(height_at_diagnosis, by = intersect(names(BMI_at_diagnosis), names(height_at_diagnosis)))

# Note: This approach joins the data frames on columns they have in common and avoids duplicating columns in the final data frame.

combined_df[, Cs(category, wfs, hfs, bfs)] %>% 
  tidyr::gather(metric, val, -category) %>% 
  ggplot(aes(x=val, fill=metric))+
  geom_density(alpha=0.3)+
  theme_classic()+
  scale_x_continuous(labels = scales::percent_format(scale = 100))+
  labs(x="Percentile for Age at Diagnosis", y="Distribution Density", fill="Metric", title="All patients combined")+
  scale_fill_discrete(labels=c("BMI", "Height", "Weight"))+
  theme(strip.background = element_blank(), axis.text.x = element_text(size=8))

combined_df[, Cs(category, wfs, hfs, bfs)] %>% 
  tidyr::gather(metric, val, -category) %>% 
  ggplot(aes(x=val, fill=metric))+
  geom_density(alpha=0.3)+
  theme_classic()+
  scale_x_continuous(labels = scales::percent_format(scale = 100))+
  labs(x="Percentile for Age at Diagnosis", y="Distribution Density", fill="Metric")+
  scale_fill_discrete(labels=c("BMI", "Height", "Weight"))+
  theme(strip.background = element_blank(), axis.text.x = element_text(size=8))+
  facet_wrap(.~category, scales="free")

growth parameters after 1 year

weight %>%  filter(!is.na(weight))%>% filter(months>=12) %>%   arrange(months) %>%   group_by(mrn) %>% slice(1) -> weight_after_1
height %>%  filter(!is.na(height))%>% filter(months>=12) %>% arrange(months) %>%   group_by(mrn) %>% slice(1) -> height_after_1
BMI %>%     filter(!is.na(bmi))%>%    filter(months>=12) %>% arrange(months) %>%   group_by(mrn) %>% slice(1) -> BMI_after_1



# Assuming BMI_after_1, weight_after_1, and height_after_1 are your data frames
# Combine them while dropping duplicated columns

combined_df <- BMI_after_1 %>%
  full_join(weight_after_1, by = intersect(names(BMI_after_1), names(weight_after_1))) %>%
  full_join(height_after_1, by = intersect(names(BMI_after_1), names(height_after_1)))

# Note: This approach j



combined_df[, Cs(category, wfs, hfs, bfs)] %>% 
  tidyr::gather(metric, val, -category) %>% 
  ggplot(aes(x=val, fill=metric))+
  geom_density(alpha=0.3)+
  theme_classic()+
  scale_x_continuous(labels = scales::percent_format(scale = 100))+
  labs(x="Percentile for Age at Diagnosis", y="Distribution Density", fill="Metric")+
  scale_fill_discrete(labels=c("BMI", "Height", "Weight"))+
  theme(strip.background = element_blank(), axis.text.x = element_text(size=8))+
  facet_wrap(.~category, scales="free")

First reading after 5 years of diagnosis

weight %>%  filter(!is.na(weight))%>% filter(months>=60) %>%   arrange(months) %>%   group_by(mrn) %>% slice(1) -> weight_after_5
height %>%  filter(!is.na(height))%>% filter(months>=60) %>% arrange(months) %>%   group_by(mrn) %>% slice(1) -> height_after_5
BMI %>%     filter(!is.na(bmi))%>%    filter(months>=60) %>% arrange(months) %>%   group_by(mrn) %>% slice(1) -> BMI_after_5



# Assuming BMI_after_5, weight_after_5, and height_after_5 are your data frames
# Combine them while dropping duplicated columns

combined_df <- BMI_after_5 %>%
  full_join(weight_after_5, by = intersect(names(BMI_after_5), names(weight_after_5))) %>%
  full_join(height_after_5, by = intersect(names(BMI_after_5), names(height_after_5)))




# Note: This approach j

combined_df[, Cs(category, wfs, hfs, bfs)] %>% 
  tidyr::gather(metric, val, -category) %>% 
  ggplot(aes(x=val, fill=metric))+
  geom_density(alpha=0.3)+
  theme_classic()+
  scale_x_continuous(labels = scales::percent_format(scale = 100))+
  labs(x="Percentile for Age at Diagnosis", y="Distribution Density", fill="Metric")+
  scale_fill_discrete(labels=c("BMI", "Height", "Weight"))+
  theme(strip.background = element_blank(), axis.text.x = element_text(size=8))+
  facet_wrap(.~category, scales="free")

table showing number of patients with abnormal values at diagnosis

BMI %>% 
  filter(!is.na(bmi)) %>% 
  arrange(months) %>% 
  group_by(mrn) %>% 
  slice(1) %>% 
  dplyr::select(category, BMI_Categories, Height_Categories, Weight_Categories ) %>% 
  tbl_summary(by=category) %>% add_overall()
Characteristic Overall, N = 3,6761 Leukemia, N = 1,0351 Lymphoma, N = 6111 CNS, N = 5401 Solid, N = 1,0421 Bone, N = 2421 STS, N = 2061
mrn 128,195 (89,131, 173,040) 131,089 (85,257, 176,207) 128,399 (91,172, 173,283) 129,276 (92,634, 173,207) 125,482 (88,878, 171,117) 128,709 (88,431, 169,525) 131,222 (94,242, 166,695)
BMI_Categories
    Healthy weight 1,186 (63%) 337 (65%) 295 (63%) 200 (65%) 171 (61%) 126 (58%) 57 (60%)
    Obese 260 (14%) 68 (13%) 69 (15%) 44 (14%) 38 (14%) 25 (11%) 16 (17%)
    Overweight 197 (10%) 57 (11%) 40 (8.5%) 37 (12%) 25 (9.0%) 30 (14%) 8 (8.4%)
    Underweight 245 (13%) 53 (10%) 67 (14%) 28 (9.1%) 45 (16%) 38 (17%) 14 (15%)
    Unknown 1,788 520 140 231 763 23 111
Height_Categories
    Normal 3,297 (90%) 931 (90%) 518 (85%) 464 (86%) 989 (95%) 210 (87%) 185 (90%)
    Short 263 (7.2%) 68 (6.6%) 67 (11%) 51 (9.4%) 37 (3.6%) 25 (10%) 15 (7.3%)
    Tall 116 (3.2%) 36 (3.5%) 26 (4.3%) 25 (4.6%) 16 (1.5%) 7 (2.9%) 6 (2.9%)
Weight_Categories
    Increased Weight 295 (8.0%) 80 (7.7%) 78 (13%) 51 (9.4%) 39 (3.7%) 28 (12%) 19 (9.2%)
    Low weight 246 (6.7%) 46 (4.4%) 66 (11%) 39 (7.2%) 42 (4.0%) 37 (15%) 16 (7.8%)
    Normal 3,135 (85%) 909 (88%) 467 (76%) 450 (83%) 961 (92%) 177 (73%) 171 (83%)
1 Median (IQR); n (%)

after 1 year

BMI %>% 
  filter(!is.na(bmi)) %>%
  filter(months>=12) %>% 
  arrange(months) %>% 
  group_by(mrn) %>% 
  slice(1) %>% 
  ungroup() %>% 
  dplyr::select(category, BMI_Categories, Height_Categories, Weight_Categories ) %>% 
  tbl_summary(by=category) %>% add_overall()
Characteristic Overall, N = 2,9161 Leukemia, N = 8911 Lymphoma, N = 5271 CNS, N = 3721 Solid, N = 7781 Bone, N = 1881 STS, N = 1601
BMI_Categories
    Healthy weight 961 (62%) 269 (58%) 255 (62%) 164 (68%) 127 (60%) 91 (58%) 55 (77%)
    Obese 246 (16%) 93 (20%) 71 (17%) 24 (10%) 28 (13%) 24 (15%) 6 (8.5%)
    Overweight 210 (14%) 74 (16%) 55 (13%) 30 (13%) 27 (13%) 20 (13%) 4 (5.6%)
    Underweight 137 (8.8%) 30 (6.4%) 28 (6.8%) 22 (9.2%) 28 (13%) 23 (15%) 6 (8.5%)
    Unknown 1,362 425 118 132 568 30 89
Height_Categories
    Normal 2,607 (89%) 809 (91%) 447 (85%) 308 (83%) 728 (94%) 167 (89%) 148 (93%)
    Short 248 (8.5%) 64 (7.2%) 62 (12%) 53 (14%) 39 (5.0%) 20 (11%) 10 (6.3%)
    Tall 61 (2.1%) 18 (2.0%) 18 (3.4%) 11 (3.0%) 11 (1.4%) 1 (0.5%) 2 (1.3%)
Weight_Categories
    Increased Weight 264 (9.1%) 96 (11%) 82 (16%) 28 (7.5%) 30 (3.9%) 23 (12%) 5 (3.1%)
    Low weight 175 (6.0%) 39 (4.4%) 29 (5.5%) 34 (9.1%) 37 (4.8%) 26 (14%) 10 (6.3%)
    Normal 2,477 (85%) 756 (85%) 416 (79%) 310 (83%) 711 (91%) 139 (74%) 145 (91%)
1 n (%)

after 5 year

BMI%>% 
  filter(!is.na(bmi)) %>%
  filter(months>=60) %>% 
  arrange(months) %>% 
  group_by(mrn) %>% 
  slice(1) %>% 
  ungroup() %>% 
  dplyr::select(category, BMI_Categories, Height_Categories, Weight_Categories ) %>% 
  tbl_summary(by=category) %>% add_overall()
Characteristic Overall, N = 1,0961 Leukemia, N = 3901 Lymphoma, N = 1851 CNS, N = 1731 Solid, N = 2501 Bone, N = 581 STS, N = 401
BMI_Categories
    Healthy weight 491 (61%) 195 (60%) 62 (60%) 96 (66%) 99 (62%) 22 (63%) 17 (55%)
    Obese 158 (20%) 72 (22%) 21 (20%) 25 (17%) 26 (16%) 8 (23%) 6 (19%)
    Overweight 103 (13%) 48 (15%) 12 (12%) 14 (9.6%) 20 (13%) 2 (5.7%) 7 (23%)
    Underweight 48 (6.0%) 10 (3.1%) 8 (7.8%) 11 (7.5%) 15 (9.4%) 3 (8.6%) 1 (3.2%)
    Unknown 296 65 82 27 90 23 9
Height_Categories
    Normal 908 (83%) 334 (86%) 163 (88%) 122 (71%) 206 (82%) 51 (88%) 32 (80%)
    Short 141 (13%) 37 (9.5%) 17 (9.2%) 45 (26%) 29 (12%) 7 (12%) 6 (15%)
    Tall 47 (4.3%) 19 (4.9%) 5 (2.7%) 6 (3.5%) 15 (6.0%) 0 (0%) 2 (5.0%)
Weight_Categories
    Increased Weight 156 (14%) 71 (18%) 21 (11%) 22 (13%) 29 (12%) 7 (12%) 6 (15%)
    Low weight 79 (7.2%) 15 (3.8%) 12 (6.5%) 27 (16%) 17 (6.8%) 5 (8.6%) 3 (7.5%)
    Normal 861 (79%) 304 (78%) 152 (82%) 124 (72%) 204 (82%) 46 (79%) 31 (78%)
1 n (%)

BMI progress over time

I made new figures … look after these 2 bar charts

BMI %>% 
  mutate(BMI_Categories=factor(BMI_Categories, levels=c("Underweight", "Healthy weight", "Overweight", "Obese"))) %>% 
  mutate(year=cut(months, breaks=seq(0,120,12), labels=1:10)) %>% 
  filter(!is.na(BMI_Categories), !is.na(year)) %>% 
  ggplot(aes(x=year, fill=BMI_Categories))+
  geom_bar(position="fill")+
  # scale_x_continuous(limits = c(0, 180), breaks = seq(0, 180, 12), labels=seq(0,15,1))+
  theme_classic()+
  labs(x="Years after diagnosis", y="Proportion", fill="Weight categories")

BMI %>% 
  mutate(BMI_Categories=factor(BMI_Categories, levels=c("Underweight", "Healthy weight", "Overweight", "Obese"))) %>% 
  mutate(year=cut(months, breaks=seq(0,120,12), labels=1:10)) %>% 
  filter(!is.na(BMI_Categories), !is.na(year)) %>% 
  ggplot(aes(x=year, fill=BMI_Categories))+
  geom_bar(position="fill")+
  # scale_x_continuous(limits = c(0, 180), breaks = seq(0, 180, 12), labels=seq(0,15,1))+
  theme_classic()+
  labs(x="Years after diagnosis", y="Proportion", fill="Weight categories")+
  facet_wrap(~category)

BMI progress over time shown using regression lines

BMI %>% 
  mutate(BMI_Categories=factor(BMI_Categories, levels=c("Underweight", "Healthy weight", "Overweight", "Obese"))) %>% 
  mutate(year=cut(months, breaks=seq(0,120,12), labels=1:10)) %>% 
  filter(!is.na(BMI_Categories), !is.na(year)) %>%
  group_by(year, mrn) %>% 
  summarise(bfs=mean(bfs)) %>% 
  mutate(BMI_Categories = case_when(
      bfs < 0.05 ~ "Underweight",
      bfs >= 0.05 & bfs < 0.85 ~ "Healthy weight",
      bfs >= 0.85 & bfs < 0.95 ~ "Overweight",
      bfs >= 0.95 ~ "Obese",
      TRUE ~ NA_character_)) %>% 
  mutate(BMI_Categories=factor(BMI_Categories, levels=c("Healthy weight", "Obese", "Overweight", "Underweight"))) %>% 
  group_by(year) %>% 
  count(BMI_Categories) %>% 
  mutate(total=sum(n), prcnt=n/total) %>% 
  ungroup() %>% 
  ggplot(aes(x=year, color=BMI_Categories, group=BMI_Categories, y=prcnt))+
  geom_point(show.legend = FALSE)+
  stat_smooth(method = "lm", formula='y ~ x', se = T)+
  stat_cor(aes(color = BMI_Categories), method = "spearman", label.x=3.5, label.y=c(0.42,0.38,0.335 ,0.29), )+
  # scale_x_continuous(limits = c(0, 180), breaks = seq(0, 180, 12), labels=seq(0,15,1))+
  theme_classic()+
  labs(x="Years after diagnosis", y="Proportion", fill="Weight categories")+
  theme(legend.position = c(0.2,0.5))

figure below is just to get the R and p values

BMI %>% 
  mutate(BMI_Categories=factor(BMI_Categories, levels=c("Underweight", "Healthy weight", "Overweight", "Obese"))) %>% 
  mutate(year=cut(months, breaks=seq(0,120,12), labels=1:10)) %>% 
  filter(!is.na(BMI_Categories), !is.na(year)) %>%
  group_by(category, year, mrn) %>% 
  summarise(bfs=mean(bfs)) %>% 
  mutate(BMI_Categories = case_when(
      bfs < 0.05 ~ "Underweight",
      bfs >= 0.05 & bfs < 0.85 ~ "Healthy weight",
      bfs >= 0.85 & bfs < 0.95 ~ "Overweight",
      bfs >= 0.95 ~ "Obese",
      TRUE ~ NA_character_)) %>% 
  mutate(BMI_Categories=factor(BMI_Categories, levels=c("Healthy weight", "Obese", "Overweight", "Underweight"))) %>% 
  group_by(category, year) %>% 
  count(BMI_Categories) %>% 
  mutate(total=sum(n), prcnt=n/total) %>% 
  ungroup() %>% 
  ggplot(aes(x=year, color=BMI_Categories, group=BMI_Categories, y=prcnt))+
  geom_point(show.legend = FALSE)+
  stat_smooth(method = "lm", formula='y ~ x', se = T)+
  stat_cor(aes(color = BMI_Categories), method = "spearman", label.x=3.5 )+
  # scale_x_continuous(limits = c(0, 180), breaks = seq(0, 180, 12), labels=seq(0,15,1))+
  theme_classic()+
  labs(x="Years after diagnosis", y="Proportion", fill="Weight categories")+
  theme(legend.position = "none")+
  facet_wrap(~category)

compare first BMI and BMI after 5 years

BMI %>%     filter(!is.na(bmi))%>%      arrange(months) %>%   group_by(mrn) %>% slice(1) %>% mutate(time="Dx") -> BMI_at_diagnosis
BMI %>%     filter(!is.na(bmi))%>%  filter(months>=60) %>%   arrange(months) %>%   group_by(mrn) %>% slice(1)  %>% 
  mutate(time="5-years")  -> BMI_at_5
BMI %>%     filter(!is.na(bmi))%>%  filter(months>=120) %>%   arrange(months) %>%   group_by(mrn) %>% slice(1)  %>% mutate(time="10-years") -> BMI_at_10

combined_df<-dplyr::bind_rows(BMI_at_diagnosis,BMI_at_5,BMI_at_10)


combined_df %>%
  filter(bmi<80) %>% 
  dplyr::select(time, bmi, category, mrn) %>% 
  mutate(time=factor(time, levels=c("Dx", "5-years", "10-years"))) %>% 
  ggplot(aes(x = time, y = bmi, group = mrn)) +
  geom_line(alpha=0.5, color = "grey") +
  geom_point(size=0.1)+
  theme_classic()+
  facet_wrap(~category)+
  labs(y="BMI", x="Timing of Readings")

  theme(strip.background = element_blank())
## List of 1
##  $ strip.background: list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE

paired t test

compare Dx and 5 years

To compare BMI at diagnosis and at 5 years for 1096 patients with both readings available, a paired t test showed statistically significant difference in the mean with mean difference of 3.3 (95% CI, 1.59-4.97, p<0.001)

a<-  combined_df %>% filter(time!="10-years") %>% filter(duplicated(mrn) | duplicated(mrn, fromLast = TRUE))

  
t.test(bmi~time, data=a  , paired=T)
## 
##  Paired t-test
## 
## data:  bmi by time
## t = 3.8192, df = 1095, p-value = 0.0001414
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  1.596831 4.971208
## sample estimates:
## mean difference 
##        3.284019

compare 5 and 10 years

To compare BMI at diagnosis and at 5 years for 179 patients with both readings available, a paired t test showed statistically significant difference in the mean with mean difference of 3.2 (95% CI, 2.63-3.76, p<0.001)

a<-  combined_df %>% filter(time!="Dx") %>% filter(duplicated(mrn) | duplicated(mrn, fromLast = TRUE))

  
t.test(bmi~time, data=a  , paired=T)
## 
##  Paired t-test
## 
## data:  bmi by time
## t = 11.108, df = 178, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  2.626559 3.761433
## sample estimates:
## mean difference 
##        3.193996

was there any correlation over time for BMI

Do not include these 2 curves

  • Spearman correlation
sp <- ggscatter(BMI %>% filter(bmi<80, months<120), x = "months", y = "bmi",
   palette = "gray", alpha=0.01, size=1,
   add = "reg.line", conf.int = TRUE)
sp + stat_cor(method = "spearman", label.x = 80)

sp <- ggscatter(BMI %>% filter(bmi<80, months<120), x = "months", y = "bmi",
   color = "category", palette = "jco", alpha=0.03, size=1,
   add = "reg.line", conf.int = TRUE)
sp + stat_cor(aes(color = category), method = "spearman", label.x = 80)

predictors for obesity at 5 years using logistic regression

This logistic regression analysis aimed to identify predictors of obesity five years following the diagnosis of childhood cancer, considering factors such as gender, age at diagnosis, cancer category, and initial BMI categories. The findings indicated that age at diagnosis and initial BMI category were significant predictors of later obesity. Specifically, younger children at the time of cancer diagnosis were more likely to become obese, with a notable decrease in the odds of obesity for each additional year of age at diagnosis (multivariable OR = 0.78, p < 0.001). Furthermore, children categorized as overweight or obese at diagnosis had significantly higher odds of being obese five years later (multivariable ORs = 4.34 and 8.91, respectively, p < 0.001 for both), highlighting the strong predictive value of initial BMI status. In contrast, gender and specific cancer categories did not maintain significant associations with obesity in the multivariable analysis, indicating that these factors may be less critical in predicting long-term obesity outcomes in this population.

BMI_at_5 %>% filter(bfs>0.95) %>%  pull(mrn)->ObeseMRN
BMI_at_diagnosis$Obesity<-ifelse(BMI_at_diagnosis$mrn %in% ObeseMRN, "YES", "NO")
BMI_at_diagnosis$Obesity<- as.factor(BMI_at_diagnosis$Obesity)
BMI_at_diagnosis$category<- forcats::fct_relevel(BMI_at_diagnosis$category, "Solid")
dependent = "Obesity"
explanatory=c("gender", "age_at_diagnosis", "category", "BMI_Categories")
explanatory_multi=c("age_at_diagnosis", "category", "BMI_Categories")
BMI_at_diagnosis$gender=factor(BMI_at_diagnosis$gender, levels=c("MALE", "FEMALE"))
BMI_at_diagnosis %>% finalfit(dependent, explanatory, explanatory_multi) %>% gt()
Dependent: Obesity NO YES OR (univariable) OR (multivariable)
gender MALE 1991 (95.4) 95 (4.6) - -
FEMALE 1527 (96.0) 63 (4.0) 0.86 (0.62-1.19, p=0.381) -
age_at_diagnosis Mean (SD) 7.1 (5.5) 5.1 (3.4) 0.93 (0.90-0.96, p<0.001) 0.78 (0.72-0.84, p<0.001)
category Solid 1015 (97.4) 27 (2.6) - -
Leukemia 963 (93.0) 72 (7.0) 2.81 (1.81-4.48, p<0.001) 1.48 (0.68-3.50, p=0.340)
Lymphoma 590 (96.6) 21 (3.4) 1.34 (0.74-2.38, p=0.324) 1.22 (0.51-3.11, p=0.661)
CNS 515 (95.4) 25 (4.6) 1.82 (1.04-3.18, p=0.033) 0.78 (0.30-2.05, p=0.607)
Bone 234 (96.7) 8 (3.3) 1.29 (0.54-2.74, p=0.540) 1.03 (0.30-3.21, p=0.966)
STS 201 (97.6) 5 (2.4) 0.94 (0.31-2.26, p=0.892) 1.13 (0.23-4.19, p=0.868)
BMI_Categories Healthy weight 1162 (98.0) 24 (2.0) - -
Obese 231 (88.8) 29 (11.2) 6.08 (3.48-10.71, p<0.001) 8.91 (4.95-16.24, p<0.001)
Overweight 180 (91.4) 17 (8.6) 4.57 (2.37-8.63, p<0.001) 4.34 (2.21-8.36, p<0.001)
Underweight 245 (100.0) 0.00 (0.00-41526.24, p=0.982) 0.00 (0.00-13494.57, p=0.981)

predictors for short stature at 5 years using logistic regression

BMI_at_5 %>% filter(hfs<0.05) %>% pull(mrn)->ShortMRN
BMI_at_diagnosis$short<-ifelse(BMI_at_diagnosis$mrn %in% ShortMRN, "YES", "NO")
BMI_at_diagnosis$short<- as.factor(BMI_at_diagnosis$short)
BMI_at_diagnosis$category<- forcats::fct_relevel(BMI_at_diagnosis$category, "Solid")
BMI_at_diagnosis$gender=factor(BMI_at_diagnosis$gender, levels=c("MALE", "FEMALE"))
dependent = "short"
explanatory=c("gender", "age_at_diagnosis", "category", "Height_Categories")
BMI_at_diagnosis %>% finalfit(dependent, explanatory) %>% gt()
Dependent: short NO YES OR (univariable) OR (multivariable)
gender MALE 2021 (96.9) 65 (3.1) - -
FEMALE 1514 (95.2) 76 (4.8) 1.56 (1.11-2.19, p=0.010) 1.55 (1.10-2.21, p=0.013)
age_at_diagnosis Mean (SD) 7.1 (5.5) 6.7 (3.3) 0.99 (0.96-1.02, p=0.415) 0.92 (0.89-0.96, p<0.001)
category Solid 1013 (97.2) 29 (2.8) - -
Leukemia 996 (96.2) 39 (3.8) 1.37 (0.84-2.25, p=0.209) 1.48 (0.89-2.46, p=0.130)
Lymphoma 594 (97.2) 17 (2.8) 1.00 (0.53-1.81, p=0.999) 1.14 (0.58-2.21, p=0.694)
CNS 498 (92.2) 42 (7.8) 2.95 (1.82-4.83, p<0.001) 3.05 (1.84-5.12, p<0.001)
Bone 235 (97.1) 7 (2.9) 1.04 (0.42-2.27, p=0.926) 1.26 (0.48-2.98, p=0.612)
STS 199 (96.6) 7 (3.4) 1.23 (0.49-2.69, p=0.630) 1.35 (0.53-3.02, p=0.495)
Height_Categories Normal 3201 (97.1) 96 (2.9) - -
Short 218 (82.9) 45 (17.1) 6.88 (4.67-10.01, p<0.001) 10.01 (6.34-15.83, p<0.001)
Tall 116 (100.0) 0.00 (0.00-0.00, p=0.969) 0.00 (0.00-0.00, p=0.969)

growth parameter after 1 year of diagnosis per age

lower and upper values at diagnosis

MALES

# percentiles 1-5 year after diagnosis
BMI %>%
  filter(age>=0, gender=="MALE") %>% 
  arrange(months) %>% 
  group_by(mrn) %>% slice(1) %>% 
  mutate(ageCategory=cut(age, breaks=c(0:30), labels=seq(0,29,1), right=F)) %>% 
  mutate(yearsAfterDiagnosis=months/12) %>% 
  mutate(yearsCategory=cut(yearsAfterDiagnosis, breaks=c(0:15), labels=c(1:15))) %>% 
  group_by(ageCategory) %>% 
  summarise(Patients=n_distinct(mrn), readings=n(),
            lowerWeightPercentile=quantile(weight, 0.10,na.rm=T), upperWeightPercentile=quantile(weight, 0.90,na.rm=T),
            lowerHeightPercentile=quantile(height, 0.10,na.rm=T), upperHeightPercentile=quantile(height, 0.90,na.rm=T),
            lowerBMIPercentile=quantile(bmi, 0.10,na.rm=T), upperBMIPercentile=quantile(bmi, 0.90,na.rm=T))
ageCategoryPatientsreadingslowerWeightPercentileupperWeightPercentilelowerHeightPercentileupperHeightPercentilelowerBMIPercentileupperBMIPercentile
01441445.611  55.778.314.720.7
11701709  13.775.188  14.619.6
217317311.716.783  99  14.718.7
316316312.919.292  108  14  18.3
417017015  22  99  113  14.117.9
515415416  26.9104  120  13.718.8
611611617.727.5110  125  13.618.3
7969619  32.2114  131  13.619.5
8808021.432.3121  138  13.619.7
9979724.144.1125  143  14.222.6
10838326  52.3128  147  14.324.1
11868628.759.4135  157  14.927.3
12838330.963.7141  164  15.125.7
13888833.576  143  168  15.128.6
14727238.481.8152  178  16  28.1
1510910943.886  158  178  16.830.6
16777746.393.6161  178  17.530.9
1710910951.696.2163  179  18.331.4
18212151.991.5163  177  18.828.9
197751.474  169  181  17.523.4
207749.286.3169  179  20.627.8
212263  63  170  170  21.821.8
223379  87.8176  177  25.828.4
231159  59  173  173  19.719.7

FEMALES

# percentiles 1-5 year after diagnosis
BMI %>%
  filter(age>=0, gender=="FEMALE") %>% 
  arrange(months) %>% 
  group_by(mrn) %>% slice(1) %>% 
  mutate(ageCategory=cut(age, breaks=c(0:30), labels=seq(0,29,1), right=F)) %>% 
  mutate(yearsAfterDiagnosis=months/12) %>% 
  mutate(yearsCategory=cut(yearsAfterDiagnosis, breaks=c(0:15), labels=c(1:15))) %>% 
  group_by(ageCategory) %>% 
  summarise(Patients=n_distinct(mrn), readings=n(),
            lowerWeightPercentile=quantile(weight, 0.10,na.rm=T), upperWeightPercentile=quantile(weight, 0.90,na.rm=T),
            lowerHeightPercentile=quantile(height, 0.10,na.rm=T), upperHeightPercentile=quantile(height, 0.90,na.rm=T),
            lowerBMIPercentile=quantile(bmi, 0.10,na.rm=T), upperBMIPercentile=quantile(bmi, 0.90,na.rm=T))
ageCategoryPatientsreadingslowerWeightPercentileupperWeightPercentilelowerHeightPercentileupperHeightPercentilelowerBMIPercentileupperBMIPercentile
01201204.3910  50  76  14.219.8
11341348.9113.472.987.115  19.3
214814811   15.984  97.514.218.1
312612612.6 18.191  104  13.418.6
413213213.5 20.198  109  13.618.2
5969615.1 23.7103  118  13.118  
6757516.8 26.9106  124  14.118.5
7848418.9 31.6116  129  13.420.1
8535320.8 34.1119  138  13.619  
9646424   44.8125  143  13.623.4
10616125   50  129  150  14.524.6
11585826.9 58.9131  155  14.324.5
12727233.7 60.1143  162  15  25.5
13707036.9 59.7146  163  15.924.3
14616139.5 74.2146  164  17.628.3
15686844.6 82.5151  167  17.830  
16717141.8 70  150  167  17.827.7
17787842.3 81.8152  168  17.529.2
18181847.3 88.7156  171  18.633.2
197747.8 74  157  166  19.227.4
204455.4 112  164  169  22.841.9
216650   76.5157  166  19.727.7
221149   49  161  161  18.918.9
242245.9 61.1163  167  16.622.9
261158   58  153  153  24.824.8

lower and upper values at1-5 years afte diagnosis

MALES

# percentiles 1-5 year after diagnosis
BMI %>%
  filter(age>=0, months>=12, months<48, gender=="MALE") %>% 
  mutate(ageCategory=cut(age, breaks=c(0:30), labels=seq(0,29,1), right=F)) %>% 
  mutate(yearsAfterDiagnosis=months/12) %>% 
  mutate(yearsCategory=cut(yearsAfterDiagnosis, breaks=c(0:15), labels=c(1:15))) %>% 
  group_by(ageCategory) %>% 
  summarise(Patients=n_distinct(mrn), readings=n(),
            lowerWeightPercentile=quantile(weight, 0.10,na.rm=T), upperWeightPercentile=quantile(weight, 0.90,na.rm=T),
            lowerHeightPercentile=quantile(height, 0.10,na.rm=T), upperHeightPercentile=quantile(height, 0.90,na.rm=T),
            lowerBMIPercentile=quantile(bmi, 0.10,na.rm=T), upperBMIPercentile=quantile(bmi, 0.90,na.rm=T))
ageCategoryPatientsreadingslowerWeightPercentileupperWeightPercentilelowerHeightPercentileupperHeightPercentilelowerBMIPercentileupperBMIPercentile
111715159  14.476  9115.120  
2208276111.718.784  10015.320.2
3303351713.520.891.510715.119.5
4305387415.523.899  11414.619.3
5304381016.926.5104  12114.519.4
6266276018.230.2110  12814.419.2
7228269020.135.5115  13214.621  
8219254922  41.1118  13814.823.2
9200214423  46  122  14314.424.1
10171158126  48  128  14814.923.6
11161143027  56.3132  15315.225  
12155194528.969.5136  15815.928.5
13154210834.380  143  16716.430.5
14160190440.696  151  17316.833.2
15170173942  91.8156  17517  30.7
16184211046.690  159  17617.630.8
17183207848.697  160  17717.433.2
18188245752.598.3161  17918.331.9
19127177755  100  163  17718.733.4
207578351  97.2165  17818.731.6
212717957  81.8164  18019.428.1

FEMALES

# percentiles 1-5 year after diagnosis
BMI %>%
  filter(age>=0, months>=12, months<48, gender=="FEMALE") %>% 
  mutate(ageCategory=cut(age, breaks=c(0:30), labels=seq(0,29,1), right=F)) %>% 
  mutate(yearsAfterDiagnosis=months/12) %>% 
  mutate(yearsCategory=cut(yearsAfterDiagnosis, breaks=c(0:15), labels=c(1:15))) %>% 
  group_by(ageCategory) %>% 
  summarise(Patients=n_distinct(mrn), readings=n(),
            lowerWeightPercentile=quantile(weight, 0.10,na.rm=T), upperWeightPercentile=quantile(weight, 0.90,na.rm=T),
            lowerHeightPercentile=quantile(height, 0.10,na.rm=T), upperHeightPercentile=quantile(height, 0.90,na.rm=T),
            lowerBMIPercentile=quantile(bmi, 0.10,na.rm=T), upperBMIPercentile=quantile(bmi, 0.90,na.rm=T))
ageCategoryPatientsreadingslowerWeightPercentileupperWeightPercentilelowerHeightPercentileupperHeightPercentilelowerBMIPercentileupperBMIPercentile
11029369.514.976  91  15.219.1
2175178711.617.885  99.115.119.4
3247270913.319.491.1105  14.719.1
4229266414.923  97  113  14.419.1
5240314616.525.2104  118  14.119.2
6204237418  28.8108  124  14.219.8
7171192320.135  114  130  14.421.4
8141162020.838.7119  136  14  22.2
9119128224  41.1125  140  15  21.8
10120129627  46  128  145  15.124.3
11119119931.556.3132  152  15.728  
12117119727  60.5133  159  15.326  
13128115736.866  145  164  16.426.5
14131116938.765  147  163  16.926.4
15134146140.569  149  165  17.727.2
16125189539.671.5149  165  16.828.3
17141133146  81.3151  166  18.732.3
18151137845.682  153  166  17.932.4
19105103448  85.6153  167  18.632.9
206051347  121  154  168  18.235.5
21218548.181.5153  166  20  31.5

lower and upper values after 5 years of diagnosis

# percentiles 5 and more years after diagnoss
BMI %>%
  filter(age>=5, age<20, months>=60) %>% 
  mutate(ageCategory=cut(age, breaks=c(0:30), labels=seq(0,29,1), right=F)) %>% 
  mutate(yearsAfterDiagnosis=months/12) %>% 
  mutate(yearsCategory=cut(yearsAfterDiagnosis, breaks=c(0:15), labels=c(1:15))) %>% 
  group_by(ageCategory) %>% 
  summarise(Patients=n_distinct(mrn), readings=n(),
            lowerWeightPercentile=quantile(weight, 0.10,na.rm=T), upperWeightPercentile=quantile(weight, 0.90,na.rm=T),
            lowerHeightPercentile=quantile(height, 0.10,na.rm=T), upperHeightPercentile=quantile(height, 0.90,na.rm=T),
            lowerBMIPercentile=quantile(bmi, 0.10,na.rm=T), upperBMIPercentile=quantile(bmi, 0.90,na.rm=T))
ageCategoryPatientsreadingslowerWeightPercentileupperWeightPercentilelowerHeightPercentileupperHeightPercentilelowerBMIPercentileupperBMIPercentile
59362718  24.610612214.419.2
615478319.231.911112514.321.6
7230124120  41  11513214.125.5
8270127222  44.611813814.724.7
9271100724  44.912514215.123.9
10277125125.754.312715014.826.8
11257132129.371.613115415.529  
12248127331.364  13615716  28.7
1321594534  70.414016216.628.3
1418463738  74.513816617.129.8
1515581943  77.814716917.630.4
1614371544.585.414917518.232  
1714761448  109  14817618.432.8
1813454550.989.515517819.433.7
1913664040.184  15517716.429.2

Growth Chart

Cox regression for prediction of mortality using BMI

explanatory=c("gender", "age_at_diagnosis", "category", "BMI_Categories", "Weight_Categories", "Height_Categories")
explanatory_multi=c("age_at_diagnosis", "category")
dependent = "Surv(OS, dead)"

BMI_at_diagnosis %>% finalfit(dependent, explanatory, explanatory_multi) %>% gt()
Dependent: Surv(OS, dead) all HR (univariable) HR (multivariable)
gender MALE 2086 (56.7) - -
FEMALE 1590 (43.3) 0.99 (0.85-1.15, p=0.889) -
age_at_diagnosis Mean (SD) 7.0 (5.4) 1.02 (1.00-1.03, p=0.009) 1.02 (1.01-1.04, p=0.002)
category Solid 1042 (28.3) - -
Leukemia 1035 (28.2) 1.11 (0.90-1.37, p=0.325) 1.06 (0.85-1.31, p=0.613)
Lymphoma 611 (16.6) 0.40 (0.29-0.57, p<0.001) 0.35 (0.24-0.49, p<0.001)
CNS 540 (14.7) 2.01 (1.62-2.50, p<0.001) 1.90 (1.52-2.37, p<0.001)
Bone 242 (6.6) 2.34 (1.80-3.05, p<0.001) 1.96 (1.48-2.61, p<0.001)
STS 206 (5.6) 2.26 (1.70-3.01, p<0.001) 2.11 (1.58-2.81, p<0.001)
BMI_Categories Healthy weight 1186 (62.8) - -
Obese 260 (13.8) 0.82 (0.59-1.14, p=0.234) -
Overweight 197 (10.4) 1.02 (0.73-1.44, p=0.888) -
Underweight 245 (13.0) 1.20 (0.90-1.61, p=0.215) -
Weight_Categories Increased Weight 295 (8.0) - -
Low weight 246 (6.7) 1.26 (0.87-1.83, p=0.220) -
Normal 3135 (85.3) 0.99 (0.75-1.31, p=0.946) -
Height_Categories Normal 3297 (89.7) - -
Short 263 (7.2) 1.22 (0.93-1.59, p=0.149) -
Tall 116 (3.2) 1.04 (0.68-1.59, p=0.859) -