KHCC cancer registry was quieried for all patients
Vitals for these patients were downloaded from our Vista software using fileman
The 2 datasets were merged and used for downstream analysis
R version 4.3.2 was used
Childsds package was used – link to this package: https://cran.r-project.org/web/packages/childsds/index.html – Reference: use website link as a referene and list the 3 references below to refer to Turkish growth charts
We calculated the time in months since diagnosis for the measurement and cut this into cateogries. Weight and Height per year after diagnosis were averaged for all reading during the year
The first reading of height and weight was used as “Diagnosis values”. First readings after 1 and 5 years following diagnosis were used.
logistic regression was used to build a prediction model for the chances of having obesity and short stature at 5 years.
To find the common values of our patients we used an arbitrary value of 10% and 90%. WE did not perform parametric analysis with standard deviation and z scores as data was not normally distributed.
BMI definitions: <5% underweight, 5-85% health, 85-95% overweight >95% obeses
Hatipoglu, N. et al. Waist circumference percentiles for 7- to 17-year-old Turkish children and adolescents. Eur J Pediatr 167, 383–389 (2008);Bundak, R. et al. Body mass index references for Turkish children. Acta Paediatrica 95, 194–198 (2006).
Neyzi, O., Furman, A., Bundak, R., Gunoz, H., Darendeliler, F., Bas, F., 2006. Growth references for Turkish children aged 6 to 18 years. Acta Paediatrica 95, 1635-1641. doi:10.1080/08035250600652013
Bundak, R. et al. Body mass index references for Turkish children. Acta Paediatrica 95, 194-198 (2006).
lookupCategory<-read.csv("lookupCAtegory.csv")
ds<- read.csv("list19Apr23.csv") %>%
clean_names() %>%
mutate(across(where(is.character), stringr::str_to_upper)) %>%
mutate(across(where(is.character), stringr::str_squish)) %>%
mutate(date_of_death=if_else(date_of_death=="00/00/0000", "4/19/2023", date_of_death)) %>%
mutate(across(contains(c("date", "dte")), function(x) parse_date_time2(x, order=c("dBY" , "mdY", "dbY")))) %>%
mutate(dead=if_else(vital_status== "ALIVE(1)", 0, 1)) %>%
mutate(OS= as.numeric(difftime(date_of_death , date_of_diagnosis, units="days"))/30) %>%
mutate(date_of_birth=date_of_diagnosis-years(age_at_diagnosis)) %>%
mutate(nationality = ifelse(nationality=="JORDAN(01)", "Jordanian", "Nonjordanian")) %>%
mutate(site=str_extract(primary_site, "(?<=\\()([^)]+)(?=\\))")) %>%
mutate(histology = str_extract(histology_behavior_icd_o_3, "\\(\\d{4}"),
histology = str_remove(histology, "\\(")) %>%
mutate(record_number= str_extract(accession_seq, "(?<=/)\\d+")) %>%
mutate(year_of_diagnosis=year(date_of_diagnosis)) %>%
filter(year_of_diagnosis>=2010)
library(stringr)
# https://seer.cancer.gov/ayarecode/aya-who2008.html 4/4/2021
aya <- read.csv("ayarecodewho2008.txt", sep=";")
aya<- aya[-98,]
for (l in 1:nrow(aya)) if (aya[l,1]=="") aya[l,1]<-aya[l-1,1]
split<-function(x){
result<-c()
t<-unlist(strsplit(x, split=","))
for (b in t){
sapply(str_extract_all(b, '[0-9.]+'), function(x) as.numeric(x)) ->a
if (length(a)>1) a<-c(a[1,1]:a[2,1])
result<-c(result, a)}
return (result)
}
aya$site<-list(c())
for (l in 1:nrow(aya)) if (aya[l,3]!=" ") aya$site[[l]]<-split(aya[l,3])
aya$hist<-list(c())
for (l in 1:nrow(aya)) if (aya[l,4]!=" ") aya$hist[[l]]<-split(aya[l,4])
aya$category<-c("")
for (l in 1:nrow(aya)) if (!grepl("\\.", aya[l,1])) aya$category[l]=aya[l,1]
for (l in 1:nrow(aya)) if (aya[l,8]=="") aya[l,8]<-aya[l-1,8]
excludeEmpty<-which(aya$Primary.Site==" ")
aya %>% slice(-excludeEmpty)->aya
########################################
# patients
########################################
removeParenthesis<-function(x)gsub("\\s*\\([^\\)]+\\)","",as.character(x))
ds$site<- removeParenthesis(ds$site) %>% substr(start = 2, stop = 4) %>% as.numeric
ds$histology<- ds$histology %>% as.numeric
ds$aya<-c("")
ds$ayaCategory<-c("")
for (m in 1:nrow(ds)){
site<- ds$site[m]
histology<- ds$histology[m]
for (n in 1:nrow(aya)){
if (is.element(site, aya$site[[n]]) & is.element(histology, aya$hist[[n]])){
ds$aya[m] <-aya$SEER.AYA.Site.Recode.WHO.2008.Definition.Site.Group[n]
ds$ayaCategory[m]<-aya$category[n]
}}}
ds$aya<-stringr::str_squish(ds$aya)
ds<- ds%>% left_join(lookupCategory)
ds$category<- factor(ds$category, levels=c("Leukemia", "Lymphoma", "CNS", "Solid", "Bone", "STS"))
# ds<- ds %>% filter(aya!="", OS>=0) %>% mutate(Histology=as.character(Histology))
#
# ds$OS <- if_else(ds$Age<18 & ds$Nationality=="Jordanian" & ds$year>2015 & ds$aya=="1.1 Acute lymphoid leukemia", ds$OS*2, ds$OS)
gender<- read_csv("cancer_registry_1Feb2024.csv") %>% janitor::clean_names() %>%
dplyr::select(gender=sex, medical_record_number) %>%
distinct(medical_record_number, .keep_all = T)
ds<- left_join(ds, gender) %>% filter(!is.na(gender)) %>% filter(!is.na(category)) %>%
mutate(gender=recode(gender,
"1 (MALE)"="MALE",
"2 (FEMALE)"="FEMALE"))
vitals<- read.csv("Vitals.csv") %>%
clean_names()%>%
inner_join(ds[, c("mrn", "histology_behavior_icd_o_3", "date_of_birth", "date_of_diagnosis", "category", "ayaCategory", "aya","gender", "age_at_diagnosis", "OS", "dead")]) %>%
mutate(date=as.Date(mdy_hm(date_time_vitals_taken))) %>%
mutate(age=as.numeric(difftime(date, date_of_birth, units="days"))/360) %>%
mutate(months=as.numeric(difftime(date,date_of_diagnosis, units="days"))/30)
save(ds, file="ds.RData")
save(vitals, file="vitals.RData")
You will get these files ds.RData and vitals.RData
Just remove the hashtags# below and load the data to have 2 dataframes
ds and vitals
# load("ds.RData")
# load("vitals.RData")
HeightWeight<- vitals %>% filter(vital_type %in% c("HEIGHT", "WEIGHT"))
HeightWeight %>%
filter(vital_type=="WEIGHT") %>%
mutate( rate=as.numeric(rate), rate=0.45359237*rate) %>%
mutate(wfs=sds(rate, age = age, sex = gender, male = "MALE", female = "FEMALE",
ref = turkish.ref , item = "weight", type = "perc")) %>%
dplyr::select(weight=rate, date, age, months, wfs, mrn, category, gender, age_at_diagnosis, OS, dead) -> weight
HeightWeight %>%
filter(vital_type=="HEIGHT") %>%
mutate(rate=as.numeric(rate), rate=2.54*rate) %>%
mutate(hfs=sds(rate, age = age, sex = gender, male = "MALE", female = "FEMALE",
ref = turkish.ref , item = "height", type = "perc")) %>%
dplyr::select(height=rate, date, age, months, hfs, mrn, category, gender, age_at_diagnosis, OS, dead) ->height
full_join(height, weight) %>%
mutate(bmi=weight/((0.01*height)^2)) ->BMI
BMI<-BMI %>% mutate(bfs=sds(bmi, age = age, sex = gender, male = "MALE", female = "FEMALE",
ref = turkish.ref , item = "bmi_bundak", type = "perc")) %>%
dplyr::select( age, months,height, hfs, weight, wfs, bmi , bfs, mrn, category, gender, age_at_diagnosis, OS, dead) %>%
filter(age>=0, months>=0)
BMI<- BMI %>%
mutate(BMI_Categories = case_when(
bfs < 0.05 ~ "Underweight",
bfs >= 0.05 & bfs < 0.85 ~ "Healthy weight",
bfs >= 0.85 & bfs < 0.95 ~ "Overweight",
bfs >= 0.95 ~ "Obese",
TRUE ~ NA_character_)) %>%
mutate(Height_Categories = case_when(
hfs < 0.05 ~ "Short",
hfs >= 0.95 ~ "Tall",
TRUE ~ "Normal")) %>%
mutate(Weight_Categories = case_when(
wfs < 0.05 ~ "Low weight",
wfs >= 0.95 ~ "Increased Weight",
TRUE ~ "Normal"))
ds %>%
filter(mrn %in% unique(vitals$mrn)) %>%
mutate(var=category) %>% count(var) %>%
donut_chart()
ds %>%
filter(mrn %in% unique(vitals$mrn)) %>%
mutate(year=year(date_of_diagnosis)) %>%
dplyr::select(c("age_at_diagnosis","gender", "year", "category", "ayaCategory")) %>%
tbl_summary()
| Characteristic | N = 3,7491 |
|---|---|
| age_at_diagnosis | 6 (2, 12) |
| gender | |
| FEMALE | 1,619 (43%) |
| MALE | 2,130 (57%) |
| year | 2,016.0 (2,014.0, 2,019.0) |
| category | |
| Leukemia | 1,042 (28%) |
| Lymphoma | 619 (17%) |
| CNS | 560 (15%) |
| Solid | 1,065 (28%) |
| Bone | 250 (6.7%) |
| STS | 213 (5.7%) |
| ayaCategory | |
| 1 Leukemias | 1,042 (28%) |
| 10 Unspecified Malignant Neoplasms | 10 (0.3%) |
| 2 Lymphomas | 619 (17%) |
| 3 CNS and Other Intracranial and Intraspinal Neoplasms (all behaviors) | 560 (15%) |
| 4 Osseous & Chondromatous Neoplasms | 250 (6.7%) |
| 5 Soft Tissue Sarcomas | 213 (5.7%) |
| 6 Germ Cell and Trophoblastic Neoplasms | 100 (2.7%) |
| 7 Melanoma and Skin Carcinomas | 15 (0.4%) |
| 8 Carcinomas | 111 (3.0%) |
| 9 Miscellaneous specified neoplasms, NOS | 829 (22%) |
| 1 Median (IQR); n (%) | |
ds %>%
mutate(year=as.character(year(date_of_diagnosis))) %>% count(year) %>% gt()
| year | n |
|---|---|
| 2010 | 342 |
| 2011 | 384 |
| 2012 | 362 |
| 2013 | 373 |
| 2014 | 364 |
| 2015 | 384 |
| 2016 | 397 |
| 2017 | 383 |
| 2018 | 293 |
| 2019 | 336 |
| 2020 | 336 |
| 2021 | 362 |
HeightWeight %>%
group_by(mrn) %>%
summarise(n=n()) %>%
summarise(Total=sum(n),
median=median(n),
q25 = quantile(n, probs = 0.25, na.rm = TRUE), # 25th percentile
q75 = quantile(n, probs = 0.75, na.rm = TRUE) # 75th percentile
) %>% gt()
| Total | median | q25 | q75 |
|---|---|---|---|
| 337589 | 66 | 26 | 130 |
Distribution lines of BMI, height and weight of children with cancer at diagnosis showing bimodal distribution of weight/BMI and mainly right-skewed curve for height
weight %>% filter(!is.na(weight))%>% arrange(months) %>% group_by(mrn) %>% slice(1) -> weight_at_diagnosis
height %>% filter(!is.na(height))%>% arrange(months) %>% group_by(mrn) %>% slice(1) -> height_at_diagnosis
BMI %>% filter(!is.na(bmi))%>% arrange(months) %>% group_by(mrn) %>% slice(1) -> BMI_at_diagnosis
# Assuming BMI_at_diagnosis, weight_at_diagnosis, and height_at_diagnosis are your data frames
# Combine them while dropping duplicated columns
combined_df <- BMI_at_diagnosis %>%
full_join(weight_at_diagnosis, by = intersect(names(BMI_at_diagnosis), names(weight_at_diagnosis))) %>%
full_join(height_at_diagnosis, by = intersect(names(BMI_at_diagnosis), names(height_at_diagnosis)))
# Note: This approach joins the data frames on columns they have in common and avoids duplicating columns in the final data frame.
combined_df[, Cs(category, wfs, hfs, bfs)] %>%
tidyr::gather(metric, val, -category) %>%
ggplot(aes(x=val, fill=metric))+
geom_density(alpha=0.3)+
theme_classic()+
scale_x_continuous(labels = scales::percent_format(scale = 100))+
labs(x="Percentile for Age at Diagnosis", y="Distribution Density", fill="Metric", title="All patients combined")+
scale_fill_discrete(labels=c("BMI", "Height", "Weight"))+
theme(strip.background = element_blank(), axis.text.x = element_text(size=8))
combined_df[, Cs(category, wfs, hfs, bfs)] %>%
tidyr::gather(metric, val, -category) %>%
ggplot(aes(x=val, fill=metric))+
geom_density(alpha=0.3)+
theme_classic()+
scale_x_continuous(labels = scales::percent_format(scale = 100))+
labs(x="Percentile for Age at Diagnosis", y="Distribution Density", fill="Metric")+
scale_fill_discrete(labels=c("BMI", "Height", "Weight"))+
theme(strip.background = element_blank(), axis.text.x = element_text(size=8))+
facet_wrap(.~category, scales="free")
weight %>% filter(!is.na(weight))%>% filter(months>=12) %>% arrange(months) %>% group_by(mrn) %>% slice(1) -> weight_after_1
height %>% filter(!is.na(height))%>% filter(months>=12) %>% arrange(months) %>% group_by(mrn) %>% slice(1) -> height_after_1
BMI %>% filter(!is.na(bmi))%>% filter(months>=12) %>% arrange(months) %>% group_by(mrn) %>% slice(1) -> BMI_after_1
# Assuming BMI_after_1, weight_after_1, and height_after_1 are your data frames
# Combine them while dropping duplicated columns
combined_df <- BMI_after_1 %>%
full_join(weight_after_1, by = intersect(names(BMI_after_1), names(weight_after_1))) %>%
full_join(height_after_1, by = intersect(names(BMI_after_1), names(height_after_1)))
# Note: This approach j
combined_df[, Cs(category, wfs, hfs, bfs)] %>%
tidyr::gather(metric, val, -category) %>%
ggplot(aes(x=val, fill=metric))+
geom_density(alpha=0.3)+
theme_classic()+
scale_x_continuous(labels = scales::percent_format(scale = 100))+
labs(x="Percentile for Age at Diagnosis", y="Distribution Density", fill="Metric")+
scale_fill_discrete(labels=c("BMI", "Height", "Weight"))+
theme(strip.background = element_blank(), axis.text.x = element_text(size=8))+
facet_wrap(.~category, scales="free")
weight %>% filter(!is.na(weight))%>% filter(months>=60) %>% arrange(months) %>% group_by(mrn) %>% slice(1) -> weight_after_5
height %>% filter(!is.na(height))%>% filter(months>=60) %>% arrange(months) %>% group_by(mrn) %>% slice(1) -> height_after_5
BMI %>% filter(!is.na(bmi))%>% filter(months>=60) %>% arrange(months) %>% group_by(mrn) %>% slice(1) -> BMI_after_5
# Assuming BMI_after_5, weight_after_5, and height_after_5 are your data frames
# Combine them while dropping duplicated columns
combined_df <- BMI_after_5 %>%
full_join(weight_after_5, by = intersect(names(BMI_after_5), names(weight_after_5))) %>%
full_join(height_after_5, by = intersect(names(BMI_after_5), names(height_after_5)))
# Note: This approach j
combined_df[, Cs(category, wfs, hfs, bfs)] %>%
tidyr::gather(metric, val, -category) %>%
ggplot(aes(x=val, fill=metric))+
geom_density(alpha=0.3)+
theme_classic()+
scale_x_continuous(labels = scales::percent_format(scale = 100))+
labs(x="Percentile for Age at Diagnosis", y="Distribution Density", fill="Metric")+
scale_fill_discrete(labels=c("BMI", "Height", "Weight"))+
theme(strip.background = element_blank(), axis.text.x = element_text(size=8))+
facet_wrap(.~category, scales="free")
BMI %>%
filter(!is.na(bmi)) %>%
arrange(months) %>%
group_by(mrn) %>%
slice(1) %>%
dplyr::select(category, BMI_Categories, Height_Categories, Weight_Categories ) %>%
tbl_summary(by=category) %>% add_overall()
| Characteristic | Overall, N = 3,6761 | Leukemia, N = 1,0351 | Lymphoma, N = 6111 | CNS, N = 5401 | Solid, N = 1,0421 | Bone, N = 2421 | STS, N = 2061 |
|---|---|---|---|---|---|---|---|
| mrn | 128,195 (89,131, 173,040) | 131,089 (85,257, 176,207) | 128,399 (91,172, 173,283) | 129,276 (92,634, 173,207) | 125,482 (88,878, 171,117) | 128,709 (88,431, 169,525) | 131,222 (94,242, 166,695) |
| BMI_Categories | |||||||
| Healthy weight | 1,186 (63%) | 337 (65%) | 295 (63%) | 200 (65%) | 171 (61%) | 126 (58%) | 57 (60%) |
| Obese | 260 (14%) | 68 (13%) | 69 (15%) | 44 (14%) | 38 (14%) | 25 (11%) | 16 (17%) |
| Overweight | 197 (10%) | 57 (11%) | 40 (8.5%) | 37 (12%) | 25 (9.0%) | 30 (14%) | 8 (8.4%) |
| Underweight | 245 (13%) | 53 (10%) | 67 (14%) | 28 (9.1%) | 45 (16%) | 38 (17%) | 14 (15%) |
| Unknown | 1,788 | 520 | 140 | 231 | 763 | 23 | 111 |
| Height_Categories | |||||||
| Normal | 3,297 (90%) | 931 (90%) | 518 (85%) | 464 (86%) | 989 (95%) | 210 (87%) | 185 (90%) |
| Short | 263 (7.2%) | 68 (6.6%) | 67 (11%) | 51 (9.4%) | 37 (3.6%) | 25 (10%) | 15 (7.3%) |
| Tall | 116 (3.2%) | 36 (3.5%) | 26 (4.3%) | 25 (4.6%) | 16 (1.5%) | 7 (2.9%) | 6 (2.9%) |
| Weight_Categories | |||||||
| Increased Weight | 295 (8.0%) | 80 (7.7%) | 78 (13%) | 51 (9.4%) | 39 (3.7%) | 28 (12%) | 19 (9.2%) |
| Low weight | 246 (6.7%) | 46 (4.4%) | 66 (11%) | 39 (7.2%) | 42 (4.0%) | 37 (15%) | 16 (7.8%) |
| Normal | 3,135 (85%) | 909 (88%) | 467 (76%) | 450 (83%) | 961 (92%) | 177 (73%) | 171 (83%) |
| 1 Median (IQR); n (%) | |||||||
BMI %>%
filter(!is.na(bmi)) %>%
filter(months>=12) %>%
arrange(months) %>%
group_by(mrn) %>%
slice(1) %>%
ungroup() %>%
dplyr::select(category, BMI_Categories, Height_Categories, Weight_Categories ) %>%
tbl_summary(by=category) %>% add_overall()
| Characteristic | Overall, N = 2,9161 | Leukemia, N = 8911 | Lymphoma, N = 5271 | CNS, N = 3721 | Solid, N = 7781 | Bone, N = 1881 | STS, N = 1601 |
|---|---|---|---|---|---|---|---|
| BMI_Categories | |||||||
| Healthy weight | 961 (62%) | 269 (58%) | 255 (62%) | 164 (68%) | 127 (60%) | 91 (58%) | 55 (77%) |
| Obese | 246 (16%) | 93 (20%) | 71 (17%) | 24 (10%) | 28 (13%) | 24 (15%) | 6 (8.5%) |
| Overweight | 210 (14%) | 74 (16%) | 55 (13%) | 30 (13%) | 27 (13%) | 20 (13%) | 4 (5.6%) |
| Underweight | 137 (8.8%) | 30 (6.4%) | 28 (6.8%) | 22 (9.2%) | 28 (13%) | 23 (15%) | 6 (8.5%) |
| Unknown | 1,362 | 425 | 118 | 132 | 568 | 30 | 89 |
| Height_Categories | |||||||
| Normal | 2,607 (89%) | 809 (91%) | 447 (85%) | 308 (83%) | 728 (94%) | 167 (89%) | 148 (93%) |
| Short | 248 (8.5%) | 64 (7.2%) | 62 (12%) | 53 (14%) | 39 (5.0%) | 20 (11%) | 10 (6.3%) |
| Tall | 61 (2.1%) | 18 (2.0%) | 18 (3.4%) | 11 (3.0%) | 11 (1.4%) | 1 (0.5%) | 2 (1.3%) |
| Weight_Categories | |||||||
| Increased Weight | 264 (9.1%) | 96 (11%) | 82 (16%) | 28 (7.5%) | 30 (3.9%) | 23 (12%) | 5 (3.1%) |
| Low weight | 175 (6.0%) | 39 (4.4%) | 29 (5.5%) | 34 (9.1%) | 37 (4.8%) | 26 (14%) | 10 (6.3%) |
| Normal | 2,477 (85%) | 756 (85%) | 416 (79%) | 310 (83%) | 711 (91%) | 139 (74%) | 145 (91%) |
| 1 n (%) | |||||||
BMI%>%
filter(!is.na(bmi)) %>%
filter(months>=60) %>%
arrange(months) %>%
group_by(mrn) %>%
slice(1) %>%
ungroup() %>%
dplyr::select(category, BMI_Categories, Height_Categories, Weight_Categories ) %>%
tbl_summary(by=category) %>% add_overall()
| Characteristic | Overall, N = 1,0961 | Leukemia, N = 3901 | Lymphoma, N = 1851 | CNS, N = 1731 | Solid, N = 2501 | Bone, N = 581 | STS, N = 401 |
|---|---|---|---|---|---|---|---|
| BMI_Categories | |||||||
| Healthy weight | 491 (61%) | 195 (60%) | 62 (60%) | 96 (66%) | 99 (62%) | 22 (63%) | 17 (55%) |
| Obese | 158 (20%) | 72 (22%) | 21 (20%) | 25 (17%) | 26 (16%) | 8 (23%) | 6 (19%) |
| Overweight | 103 (13%) | 48 (15%) | 12 (12%) | 14 (9.6%) | 20 (13%) | 2 (5.7%) | 7 (23%) |
| Underweight | 48 (6.0%) | 10 (3.1%) | 8 (7.8%) | 11 (7.5%) | 15 (9.4%) | 3 (8.6%) | 1 (3.2%) |
| Unknown | 296 | 65 | 82 | 27 | 90 | 23 | 9 |
| Height_Categories | |||||||
| Normal | 908 (83%) | 334 (86%) | 163 (88%) | 122 (71%) | 206 (82%) | 51 (88%) | 32 (80%) |
| Short | 141 (13%) | 37 (9.5%) | 17 (9.2%) | 45 (26%) | 29 (12%) | 7 (12%) | 6 (15%) |
| Tall | 47 (4.3%) | 19 (4.9%) | 5 (2.7%) | 6 (3.5%) | 15 (6.0%) | 0 (0%) | 2 (5.0%) |
| Weight_Categories | |||||||
| Increased Weight | 156 (14%) | 71 (18%) | 21 (11%) | 22 (13%) | 29 (12%) | 7 (12%) | 6 (15%) |
| Low weight | 79 (7.2%) | 15 (3.8%) | 12 (6.5%) | 27 (16%) | 17 (6.8%) | 5 (8.6%) | 3 (7.5%) |
| Normal | 861 (79%) | 304 (78%) | 152 (82%) | 124 (72%) | 204 (82%) | 46 (79%) | 31 (78%) |
| 1 n (%) | |||||||
I made new figures … look after these 2 bar charts
BMI %>%
mutate(BMI_Categories=factor(BMI_Categories, levels=c("Underweight", "Healthy weight", "Overweight", "Obese"))) %>%
mutate(year=cut(months, breaks=seq(0,120,12), labels=1:10)) %>%
filter(!is.na(BMI_Categories), !is.na(year)) %>%
ggplot(aes(x=year, fill=BMI_Categories))+
geom_bar(position="fill")+
# scale_x_continuous(limits = c(0, 180), breaks = seq(0, 180, 12), labels=seq(0,15,1))+
theme_classic()+
labs(x="Years after diagnosis", y="Proportion", fill="Weight categories")
BMI %>%
mutate(BMI_Categories=factor(BMI_Categories, levels=c("Underweight", "Healthy weight", "Overweight", "Obese"))) %>%
mutate(year=cut(months, breaks=seq(0,120,12), labels=1:10)) %>%
filter(!is.na(BMI_Categories), !is.na(year)) %>%
ggplot(aes(x=year, fill=BMI_Categories))+
geom_bar(position="fill")+
# scale_x_continuous(limits = c(0, 180), breaks = seq(0, 180, 12), labels=seq(0,15,1))+
theme_classic()+
labs(x="Years after diagnosis", y="Proportion", fill="Weight categories")+
facet_wrap(~category)
BMI %>%
mutate(BMI_Categories=factor(BMI_Categories, levels=c("Underweight", "Healthy weight", "Overweight", "Obese"))) %>%
mutate(year=cut(months, breaks=seq(0,120,12), labels=1:10)) %>%
filter(!is.na(BMI_Categories), !is.na(year)) %>%
group_by(year, mrn) %>%
summarise(bfs=mean(bfs)) %>%
mutate(BMI_Categories = case_when(
bfs < 0.05 ~ "Underweight",
bfs >= 0.05 & bfs < 0.85 ~ "Healthy weight",
bfs >= 0.85 & bfs < 0.95 ~ "Overweight",
bfs >= 0.95 ~ "Obese",
TRUE ~ NA_character_)) %>%
mutate(BMI_Categories=factor(BMI_Categories, levels=c("Healthy weight", "Obese", "Overweight", "Underweight"))) %>%
group_by(year) %>%
count(BMI_Categories) %>%
mutate(total=sum(n), prcnt=n/total) %>%
ungroup() %>%
ggplot(aes(x=year, color=BMI_Categories, group=BMI_Categories, y=prcnt))+
geom_point(show.legend = FALSE)+
stat_smooth(method = "lm", formula='y ~ x', se = T)+
stat_cor(aes(color = BMI_Categories), method = "spearman", label.x=3.5, label.y=c(0.42,0.38,0.335 ,0.29), )+
# scale_x_continuous(limits = c(0, 180), breaks = seq(0, 180, 12), labels=seq(0,15,1))+
theme_classic()+
labs(x="Years after diagnosis", y="Proportion", fill="Weight categories")+
theme(legend.position = c(0.2,0.5))
BMI %>%
mutate(BMI_Categories=factor(BMI_Categories, levels=c("Underweight", "Healthy weight", "Overweight", "Obese"))) %>%
mutate(year=cut(months, breaks=seq(0,120,12), labels=1:10)) %>%
filter(!is.na(BMI_Categories), !is.na(year)) %>%
group_by(category, year, mrn) %>%
summarise(bfs=mean(bfs)) %>%
mutate(BMI_Categories = case_when(
bfs < 0.05 ~ "Underweight",
bfs >= 0.05 & bfs < 0.85 ~ "Healthy weight",
bfs >= 0.85 & bfs < 0.95 ~ "Overweight",
bfs >= 0.95 ~ "Obese",
TRUE ~ NA_character_)) %>%
mutate(BMI_Categories=factor(BMI_Categories, levels=c("Healthy weight", "Obese", "Overweight", "Underweight"))) %>%
group_by(category, year) %>%
count(BMI_Categories) %>%
mutate(total=sum(n), prcnt=n/total) %>%
ungroup() %>%
ggplot(aes(x=year, color=BMI_Categories, group=BMI_Categories, y=prcnt))+
geom_point(show.legend = FALSE)+
stat_smooth(method = "lm", formula='y ~ x', se = T)+
stat_cor(aes(color = BMI_Categories), method = "spearman", label.x=3.5 )+
# scale_x_continuous(limits = c(0, 180), breaks = seq(0, 180, 12), labels=seq(0,15,1))+
theme_classic()+
labs(x="Years after diagnosis", y="Proportion", fill="Weight categories")+
theme(legend.position = "none")+
facet_wrap(~category)
BMI %>% filter(!is.na(bmi))%>% arrange(months) %>% group_by(mrn) %>% slice(1) %>% mutate(time="Dx") -> BMI_at_diagnosis
BMI %>% filter(!is.na(bmi))%>% filter(months>=60) %>% arrange(months) %>% group_by(mrn) %>% slice(1) %>%
mutate(time="5-years") -> BMI_at_5
BMI %>% filter(!is.na(bmi))%>% filter(months>=120) %>% arrange(months) %>% group_by(mrn) %>% slice(1) %>% mutate(time="10-years") -> BMI_at_10
combined_df<-dplyr::bind_rows(BMI_at_diagnosis,BMI_at_5,BMI_at_10)
combined_df %>%
filter(bmi<80) %>%
dplyr::select(time, bmi, category, mrn) %>%
mutate(time=factor(time, levels=c("Dx", "5-years", "10-years"))) %>%
ggplot(aes(x = time, y = bmi, group = mrn)) +
geom_line(alpha=0.5, color = "grey") +
geom_point(size=0.1)+
theme_classic()+
facet_wrap(~category)+
labs(y="BMI", x="Timing of Readings")
theme(strip.background = element_blank())
## List of 1
## $ strip.background: list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi FALSE
## - attr(*, "validate")= logi TRUE
To compare BMI at diagnosis and at 5 years for 1096 patients with both readings available, a paired t test showed statistically significant difference in the mean with mean difference of 3.3 (95% CI, 1.59-4.97, p<0.001)
a<- combined_df %>% filter(time!="10-years") %>% filter(duplicated(mrn) | duplicated(mrn, fromLast = TRUE))
t.test(bmi~time, data=a , paired=T)
##
## Paired t-test
##
## data: bmi by time
## t = 3.8192, df = 1095, p-value = 0.0001414
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 1.596831 4.971208
## sample estimates:
## mean difference
## 3.284019
To compare BMI at diagnosis and at 5 years for 179 patients with both readings available, a paired t test showed statistically significant difference in the mean with mean difference of 3.2 (95% CI, 2.63-3.76, p<0.001)
a<- combined_df %>% filter(time!="Dx") %>% filter(duplicated(mrn) | duplicated(mrn, fromLast = TRUE))
t.test(bmi~time, data=a , paired=T)
##
## Paired t-test
##
## data: bmi by time
## t = 11.108, df = 178, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 2.626559 3.761433
## sample estimates:
## mean difference
## 3.193996
sp <- ggscatter(BMI %>% filter(bmi<80, months<120), x = "months", y = "bmi",
palette = "gray", alpha=0.01, size=1,
add = "reg.line", conf.int = TRUE)
sp + stat_cor(method = "spearman", label.x = 80)
sp <- ggscatter(BMI %>% filter(bmi<80, months<120), x = "months", y = "bmi",
color = "category", palette = "jco", alpha=0.03, size=1,
add = "reg.line", conf.int = TRUE)
sp + stat_cor(aes(color = category), method = "spearman", label.x = 80)
This logistic regression analysis aimed to identify predictors of obesity five years following the diagnosis of childhood cancer, considering factors such as gender, age at diagnosis, cancer category, and initial BMI categories. The findings indicated that age at diagnosis and initial BMI category were significant predictors of later obesity. Specifically, younger children at the time of cancer diagnosis were more likely to become obese, with a notable decrease in the odds of obesity for each additional year of age at diagnosis (multivariable OR = 0.78, p < 0.001). Furthermore, children categorized as overweight or obese at diagnosis had significantly higher odds of being obese five years later (multivariable ORs = 4.34 and 8.91, respectively, p < 0.001 for both), highlighting the strong predictive value of initial BMI status. In contrast, gender and specific cancer categories did not maintain significant associations with obesity in the multivariable analysis, indicating that these factors may be less critical in predicting long-term obesity outcomes in this population.
BMI_at_5 %>% filter(bfs>0.95) %>% pull(mrn)->ObeseMRN
BMI_at_diagnosis$Obesity<-ifelse(BMI_at_diagnosis$mrn %in% ObeseMRN, "YES", "NO")
BMI_at_diagnosis$Obesity<- as.factor(BMI_at_diagnosis$Obesity)
BMI_at_diagnosis$category<- forcats::fct_relevel(BMI_at_diagnosis$category, "Solid")
dependent = "Obesity"
explanatory=c("gender", "age_at_diagnosis", "category", "BMI_Categories")
explanatory_multi=c("age_at_diagnosis", "category", "BMI_Categories")
BMI_at_diagnosis$gender=factor(BMI_at_diagnosis$gender, levels=c("MALE", "FEMALE"))
BMI_at_diagnosis %>% finalfit(dependent, explanatory, explanatory_multi) %>% gt()
| Dependent: Obesity | NO | YES | OR (univariable) | OR (multivariable) | |
|---|---|---|---|---|---|
| gender | MALE | 1991 (95.4) | 95 (4.6) | - | - |
| FEMALE | 1527 (96.0) | 63 (4.0) | 0.86 (0.62-1.19, p=0.381) | - | |
| age_at_diagnosis | Mean (SD) | 7.1 (5.5) | 5.1 (3.4) | 0.93 (0.90-0.96, p<0.001) | 0.78 (0.72-0.84, p<0.001) |
| category | Solid | 1015 (97.4) | 27 (2.6) | - | - |
| Leukemia | 963 (93.0) | 72 (7.0) | 2.81 (1.81-4.48, p<0.001) | 1.48 (0.68-3.50, p=0.340) | |
| Lymphoma | 590 (96.6) | 21 (3.4) | 1.34 (0.74-2.38, p=0.324) | 1.22 (0.51-3.11, p=0.661) | |
| CNS | 515 (95.4) | 25 (4.6) | 1.82 (1.04-3.18, p=0.033) | 0.78 (0.30-2.05, p=0.607) | |
| Bone | 234 (96.7) | 8 (3.3) | 1.29 (0.54-2.74, p=0.540) | 1.03 (0.30-3.21, p=0.966) | |
| STS | 201 (97.6) | 5 (2.4) | 0.94 (0.31-2.26, p=0.892) | 1.13 (0.23-4.19, p=0.868) | |
| BMI_Categories | Healthy weight | 1162 (98.0) | 24 (2.0) | - | - |
| Obese | 231 (88.8) | 29 (11.2) | 6.08 (3.48-10.71, p<0.001) | 8.91 (4.95-16.24, p<0.001) | |
| Overweight | 180 (91.4) | 17 (8.6) | 4.57 (2.37-8.63, p<0.001) | 4.34 (2.21-8.36, p<0.001) | |
| Underweight | 245 (100.0) | 0.00 (0.00-41526.24, p=0.982) | 0.00 (0.00-13494.57, p=0.981) |
BMI_at_5 %>% filter(hfs<0.05) %>% pull(mrn)->ShortMRN
BMI_at_diagnosis$short<-ifelse(BMI_at_diagnosis$mrn %in% ShortMRN, "YES", "NO")
BMI_at_diagnosis$short<- as.factor(BMI_at_diagnosis$short)
BMI_at_diagnosis$category<- forcats::fct_relevel(BMI_at_diagnosis$category, "Solid")
BMI_at_diagnosis$gender=factor(BMI_at_diagnosis$gender, levels=c("MALE", "FEMALE"))
dependent = "short"
explanatory=c("gender", "age_at_diagnosis", "category", "Height_Categories")
BMI_at_diagnosis %>% finalfit(dependent, explanatory) %>% gt()
| Dependent: short | NO | YES | OR (univariable) | OR (multivariable) | |
|---|---|---|---|---|---|
| gender | MALE | 2021 (96.9) | 65 (3.1) | - | - |
| FEMALE | 1514 (95.2) | 76 (4.8) | 1.56 (1.11-2.19, p=0.010) | 1.55 (1.10-2.21, p=0.013) | |
| age_at_diagnosis | Mean (SD) | 7.1 (5.5) | 6.7 (3.3) | 0.99 (0.96-1.02, p=0.415) | 0.92 (0.89-0.96, p<0.001) |
| category | Solid | 1013 (97.2) | 29 (2.8) | - | - |
| Leukemia | 996 (96.2) | 39 (3.8) | 1.37 (0.84-2.25, p=0.209) | 1.48 (0.89-2.46, p=0.130) | |
| Lymphoma | 594 (97.2) | 17 (2.8) | 1.00 (0.53-1.81, p=0.999) | 1.14 (0.58-2.21, p=0.694) | |
| CNS | 498 (92.2) | 42 (7.8) | 2.95 (1.82-4.83, p<0.001) | 3.05 (1.84-5.12, p<0.001) | |
| Bone | 235 (97.1) | 7 (2.9) | 1.04 (0.42-2.27, p=0.926) | 1.26 (0.48-2.98, p=0.612) | |
| STS | 199 (96.6) | 7 (3.4) | 1.23 (0.49-2.69, p=0.630) | 1.35 (0.53-3.02, p=0.495) | |
| Height_Categories | Normal | 3201 (97.1) | 96 (2.9) | - | - |
| Short | 218 (82.9) | 45 (17.1) | 6.88 (4.67-10.01, p<0.001) | 10.01 (6.34-15.83, p<0.001) | |
| Tall | 116 (100.0) | 0.00 (0.00-0.00, p=0.969) | 0.00 (0.00-0.00, p=0.969) |
# percentiles 1-5 year after diagnosis
BMI %>%
filter(age>=0, gender=="MALE") %>%
arrange(months) %>%
group_by(mrn) %>% slice(1) %>%
mutate(ageCategory=cut(age, breaks=c(0:30), labels=seq(0,29,1), right=F)) %>%
mutate(yearsAfterDiagnosis=months/12) %>%
mutate(yearsCategory=cut(yearsAfterDiagnosis, breaks=c(0:15), labels=c(1:15))) %>%
group_by(ageCategory) %>%
summarise(Patients=n_distinct(mrn), readings=n(),
lowerWeightPercentile=quantile(weight, 0.10,na.rm=T), upperWeightPercentile=quantile(weight, 0.90,na.rm=T),
lowerHeightPercentile=quantile(height, 0.10,na.rm=T), upperHeightPercentile=quantile(height, 0.90,na.rm=T),
lowerBMIPercentile=quantile(bmi, 0.10,na.rm=T), upperBMIPercentile=quantile(bmi, 0.90,na.rm=T))
| ageCategory | Patients | readings | lowerWeightPercentile | upperWeightPercentile | lowerHeightPercentile | upperHeightPercentile | lowerBMIPercentile | upperBMIPercentile |
|---|---|---|---|---|---|---|---|---|
| 0 | 144 | 144 | 5.6 | 11 | 55.7 | 78.3 | 14.7 | 20.7 |
| 1 | 170 | 170 | 9 | 13.7 | 75.1 | 88 | 14.6 | 19.6 |
| 2 | 173 | 173 | 11.7 | 16.7 | 83 | 99 | 14.7 | 18.7 |
| 3 | 163 | 163 | 12.9 | 19.2 | 92 | 108 | 14 | 18.3 |
| 4 | 170 | 170 | 15 | 22 | 99 | 113 | 14.1 | 17.9 |
| 5 | 154 | 154 | 16 | 26.9 | 104 | 120 | 13.7 | 18.8 |
| 6 | 116 | 116 | 17.7 | 27.5 | 110 | 125 | 13.6 | 18.3 |
| 7 | 96 | 96 | 19 | 32.2 | 114 | 131 | 13.6 | 19.5 |
| 8 | 80 | 80 | 21.4 | 32.3 | 121 | 138 | 13.6 | 19.7 |
| 9 | 97 | 97 | 24.1 | 44.1 | 125 | 143 | 14.2 | 22.6 |
| 10 | 83 | 83 | 26 | 52.3 | 128 | 147 | 14.3 | 24.1 |
| 11 | 86 | 86 | 28.7 | 59.4 | 135 | 157 | 14.9 | 27.3 |
| 12 | 83 | 83 | 30.9 | 63.7 | 141 | 164 | 15.1 | 25.7 |
| 13 | 88 | 88 | 33.5 | 76 | 143 | 168 | 15.1 | 28.6 |
| 14 | 72 | 72 | 38.4 | 81.8 | 152 | 178 | 16 | 28.1 |
| 15 | 109 | 109 | 43.8 | 86 | 158 | 178 | 16.8 | 30.6 |
| 16 | 77 | 77 | 46.3 | 93.6 | 161 | 178 | 17.5 | 30.9 |
| 17 | 109 | 109 | 51.6 | 96.2 | 163 | 179 | 18.3 | 31.4 |
| 18 | 21 | 21 | 51.9 | 91.5 | 163 | 177 | 18.8 | 28.9 |
| 19 | 7 | 7 | 51.4 | 74 | 169 | 181 | 17.5 | 23.4 |
| 20 | 7 | 7 | 49.2 | 86.3 | 169 | 179 | 20.6 | 27.8 |
| 21 | 2 | 2 | 63 | 63 | 170 | 170 | 21.8 | 21.8 |
| 22 | 3 | 3 | 79 | 87.8 | 176 | 177 | 25.8 | 28.4 |
| 23 | 1 | 1 | 59 | 59 | 173 | 173 | 19.7 | 19.7 |
# percentiles 1-5 year after diagnosis
BMI %>%
filter(age>=0, gender=="FEMALE") %>%
arrange(months) %>%
group_by(mrn) %>% slice(1) %>%
mutate(ageCategory=cut(age, breaks=c(0:30), labels=seq(0,29,1), right=F)) %>%
mutate(yearsAfterDiagnosis=months/12) %>%
mutate(yearsCategory=cut(yearsAfterDiagnosis, breaks=c(0:15), labels=c(1:15))) %>%
group_by(ageCategory) %>%
summarise(Patients=n_distinct(mrn), readings=n(),
lowerWeightPercentile=quantile(weight, 0.10,na.rm=T), upperWeightPercentile=quantile(weight, 0.90,na.rm=T),
lowerHeightPercentile=quantile(height, 0.10,na.rm=T), upperHeightPercentile=quantile(height, 0.90,na.rm=T),
lowerBMIPercentile=quantile(bmi, 0.10,na.rm=T), upperBMIPercentile=quantile(bmi, 0.90,na.rm=T))
| ageCategory | Patients | readings | lowerWeightPercentile | upperWeightPercentile | lowerHeightPercentile | upperHeightPercentile | lowerBMIPercentile | upperBMIPercentile |
|---|---|---|---|---|---|---|---|---|
| 0 | 120 | 120 | 4.39 | 10 | 50 | 76 | 14.2 | 19.8 |
| 1 | 134 | 134 | 8.91 | 13.4 | 72.9 | 87.1 | 15 | 19.3 |
| 2 | 148 | 148 | 11 | 15.9 | 84 | 97.5 | 14.2 | 18.1 |
| 3 | 126 | 126 | 12.6 | 18.1 | 91 | 104 | 13.4 | 18.6 |
| 4 | 132 | 132 | 13.5 | 20.1 | 98 | 109 | 13.6 | 18.2 |
| 5 | 96 | 96 | 15.1 | 23.7 | 103 | 118 | 13.1 | 18 |
| 6 | 75 | 75 | 16.8 | 26.9 | 106 | 124 | 14.1 | 18.5 |
| 7 | 84 | 84 | 18.9 | 31.6 | 116 | 129 | 13.4 | 20.1 |
| 8 | 53 | 53 | 20.8 | 34.1 | 119 | 138 | 13.6 | 19 |
| 9 | 64 | 64 | 24 | 44.8 | 125 | 143 | 13.6 | 23.4 |
| 10 | 61 | 61 | 25 | 50 | 129 | 150 | 14.5 | 24.6 |
| 11 | 58 | 58 | 26.9 | 58.9 | 131 | 155 | 14.3 | 24.5 |
| 12 | 72 | 72 | 33.7 | 60.1 | 143 | 162 | 15 | 25.5 |
| 13 | 70 | 70 | 36.9 | 59.7 | 146 | 163 | 15.9 | 24.3 |
| 14 | 61 | 61 | 39.5 | 74.2 | 146 | 164 | 17.6 | 28.3 |
| 15 | 68 | 68 | 44.6 | 82.5 | 151 | 167 | 17.8 | 30 |
| 16 | 71 | 71 | 41.8 | 70 | 150 | 167 | 17.8 | 27.7 |
| 17 | 78 | 78 | 42.3 | 81.8 | 152 | 168 | 17.5 | 29.2 |
| 18 | 18 | 18 | 47.3 | 88.7 | 156 | 171 | 18.6 | 33.2 |
| 19 | 7 | 7 | 47.8 | 74 | 157 | 166 | 19.2 | 27.4 |
| 20 | 4 | 4 | 55.4 | 112 | 164 | 169 | 22.8 | 41.9 |
| 21 | 6 | 6 | 50 | 76.5 | 157 | 166 | 19.7 | 27.7 |
| 22 | 1 | 1 | 49 | 49 | 161 | 161 | 18.9 | 18.9 |
| 24 | 2 | 2 | 45.9 | 61.1 | 163 | 167 | 16.6 | 22.9 |
| 26 | 1 | 1 | 58 | 58 | 153 | 153 | 24.8 | 24.8 |
# percentiles 1-5 year after diagnosis
BMI %>%
filter(age>=0, months>=12, months<48, gender=="MALE") %>%
mutate(ageCategory=cut(age, breaks=c(0:30), labels=seq(0,29,1), right=F)) %>%
mutate(yearsAfterDiagnosis=months/12) %>%
mutate(yearsCategory=cut(yearsAfterDiagnosis, breaks=c(0:15), labels=c(1:15))) %>%
group_by(ageCategory) %>%
summarise(Patients=n_distinct(mrn), readings=n(),
lowerWeightPercentile=quantile(weight, 0.10,na.rm=T), upperWeightPercentile=quantile(weight, 0.90,na.rm=T),
lowerHeightPercentile=quantile(height, 0.10,na.rm=T), upperHeightPercentile=quantile(height, 0.90,na.rm=T),
lowerBMIPercentile=quantile(bmi, 0.10,na.rm=T), upperBMIPercentile=quantile(bmi, 0.90,na.rm=T))
| ageCategory | Patients | readings | lowerWeightPercentile | upperWeightPercentile | lowerHeightPercentile | upperHeightPercentile | lowerBMIPercentile | upperBMIPercentile |
|---|---|---|---|---|---|---|---|---|
| 1 | 117 | 1515 | 9 | 14.4 | 76 | 91 | 15.1 | 20 |
| 2 | 208 | 2761 | 11.7 | 18.7 | 84 | 100 | 15.3 | 20.2 |
| 3 | 303 | 3517 | 13.5 | 20.8 | 91.5 | 107 | 15.1 | 19.5 |
| 4 | 305 | 3874 | 15.5 | 23.8 | 99 | 114 | 14.6 | 19.3 |
| 5 | 304 | 3810 | 16.9 | 26.5 | 104 | 121 | 14.5 | 19.4 |
| 6 | 266 | 2760 | 18.2 | 30.2 | 110 | 128 | 14.4 | 19.2 |
| 7 | 228 | 2690 | 20.1 | 35.5 | 115 | 132 | 14.6 | 21 |
| 8 | 219 | 2549 | 22 | 41.1 | 118 | 138 | 14.8 | 23.2 |
| 9 | 200 | 2144 | 23 | 46 | 122 | 143 | 14.4 | 24.1 |
| 10 | 171 | 1581 | 26 | 48 | 128 | 148 | 14.9 | 23.6 |
| 11 | 161 | 1430 | 27 | 56.3 | 132 | 153 | 15.2 | 25 |
| 12 | 155 | 1945 | 28.9 | 69.5 | 136 | 158 | 15.9 | 28.5 |
| 13 | 154 | 2108 | 34.3 | 80 | 143 | 167 | 16.4 | 30.5 |
| 14 | 160 | 1904 | 40.6 | 96 | 151 | 173 | 16.8 | 33.2 |
| 15 | 170 | 1739 | 42 | 91.8 | 156 | 175 | 17 | 30.7 |
| 16 | 184 | 2110 | 46.6 | 90 | 159 | 176 | 17.6 | 30.8 |
| 17 | 183 | 2078 | 48.6 | 97 | 160 | 177 | 17.4 | 33.2 |
| 18 | 188 | 2457 | 52.5 | 98.3 | 161 | 179 | 18.3 | 31.9 |
| 19 | 127 | 1777 | 55 | 100 | 163 | 177 | 18.7 | 33.4 |
| 20 | 75 | 783 | 51 | 97.2 | 165 | 178 | 18.7 | 31.6 |
| 21 | 27 | 179 | 57 | 81.8 | 164 | 180 | 19.4 | 28.1 |
# percentiles 1-5 year after diagnosis
BMI %>%
filter(age>=0, months>=12, months<48, gender=="FEMALE") %>%
mutate(ageCategory=cut(age, breaks=c(0:30), labels=seq(0,29,1), right=F)) %>%
mutate(yearsAfterDiagnosis=months/12) %>%
mutate(yearsCategory=cut(yearsAfterDiagnosis, breaks=c(0:15), labels=c(1:15))) %>%
group_by(ageCategory) %>%
summarise(Patients=n_distinct(mrn), readings=n(),
lowerWeightPercentile=quantile(weight, 0.10,na.rm=T), upperWeightPercentile=quantile(weight, 0.90,na.rm=T),
lowerHeightPercentile=quantile(height, 0.10,na.rm=T), upperHeightPercentile=quantile(height, 0.90,na.rm=T),
lowerBMIPercentile=quantile(bmi, 0.10,na.rm=T), upperBMIPercentile=quantile(bmi, 0.90,na.rm=T))
| ageCategory | Patients | readings | lowerWeightPercentile | upperWeightPercentile | lowerHeightPercentile | upperHeightPercentile | lowerBMIPercentile | upperBMIPercentile |
|---|---|---|---|---|---|---|---|---|
| 1 | 102 | 936 | 9.5 | 14.9 | 76 | 91 | 15.2 | 19.1 |
| 2 | 175 | 1787 | 11.6 | 17.8 | 85 | 99.1 | 15.1 | 19.4 |
| 3 | 247 | 2709 | 13.3 | 19.4 | 91.1 | 105 | 14.7 | 19.1 |
| 4 | 229 | 2664 | 14.9 | 23 | 97 | 113 | 14.4 | 19.1 |
| 5 | 240 | 3146 | 16.5 | 25.2 | 104 | 118 | 14.1 | 19.2 |
| 6 | 204 | 2374 | 18 | 28.8 | 108 | 124 | 14.2 | 19.8 |
| 7 | 171 | 1923 | 20.1 | 35 | 114 | 130 | 14.4 | 21.4 |
| 8 | 141 | 1620 | 20.8 | 38.7 | 119 | 136 | 14 | 22.2 |
| 9 | 119 | 1282 | 24 | 41.1 | 125 | 140 | 15 | 21.8 |
| 10 | 120 | 1296 | 27 | 46 | 128 | 145 | 15.1 | 24.3 |
| 11 | 119 | 1199 | 31.5 | 56.3 | 132 | 152 | 15.7 | 28 |
| 12 | 117 | 1197 | 27 | 60.5 | 133 | 159 | 15.3 | 26 |
| 13 | 128 | 1157 | 36.8 | 66 | 145 | 164 | 16.4 | 26.5 |
| 14 | 131 | 1169 | 38.7 | 65 | 147 | 163 | 16.9 | 26.4 |
| 15 | 134 | 1461 | 40.5 | 69 | 149 | 165 | 17.7 | 27.2 |
| 16 | 125 | 1895 | 39.6 | 71.5 | 149 | 165 | 16.8 | 28.3 |
| 17 | 141 | 1331 | 46 | 81.3 | 151 | 166 | 18.7 | 32.3 |
| 18 | 151 | 1378 | 45.6 | 82 | 153 | 166 | 17.9 | 32.4 |
| 19 | 105 | 1034 | 48 | 85.6 | 153 | 167 | 18.6 | 32.9 |
| 20 | 60 | 513 | 47 | 121 | 154 | 168 | 18.2 | 35.5 |
| 21 | 21 | 85 | 48.1 | 81.5 | 153 | 166 | 20 | 31.5 |
# percentiles 5 and more years after diagnoss
BMI %>%
filter(age>=5, age<20, months>=60) %>%
mutate(ageCategory=cut(age, breaks=c(0:30), labels=seq(0,29,1), right=F)) %>%
mutate(yearsAfterDiagnosis=months/12) %>%
mutate(yearsCategory=cut(yearsAfterDiagnosis, breaks=c(0:15), labels=c(1:15))) %>%
group_by(ageCategory) %>%
summarise(Patients=n_distinct(mrn), readings=n(),
lowerWeightPercentile=quantile(weight, 0.10,na.rm=T), upperWeightPercentile=quantile(weight, 0.90,na.rm=T),
lowerHeightPercentile=quantile(height, 0.10,na.rm=T), upperHeightPercentile=quantile(height, 0.90,na.rm=T),
lowerBMIPercentile=quantile(bmi, 0.10,na.rm=T), upperBMIPercentile=quantile(bmi, 0.90,na.rm=T))
| ageCategory | Patients | readings | lowerWeightPercentile | upperWeightPercentile | lowerHeightPercentile | upperHeightPercentile | lowerBMIPercentile | upperBMIPercentile |
|---|---|---|---|---|---|---|---|---|
| 5 | 93 | 627 | 18 | 24.6 | 106 | 122 | 14.4 | 19.2 |
| 6 | 154 | 783 | 19.2 | 31.9 | 111 | 125 | 14.3 | 21.6 |
| 7 | 230 | 1241 | 20 | 41 | 115 | 132 | 14.1 | 25.5 |
| 8 | 270 | 1272 | 22 | 44.6 | 118 | 138 | 14.7 | 24.7 |
| 9 | 271 | 1007 | 24 | 44.9 | 125 | 142 | 15.1 | 23.9 |
| 10 | 277 | 1251 | 25.7 | 54.3 | 127 | 150 | 14.8 | 26.8 |
| 11 | 257 | 1321 | 29.3 | 71.6 | 131 | 154 | 15.5 | 29 |
| 12 | 248 | 1273 | 31.3 | 64 | 136 | 157 | 16 | 28.7 |
| 13 | 215 | 945 | 34 | 70.4 | 140 | 162 | 16.6 | 28.3 |
| 14 | 184 | 637 | 38 | 74.5 | 138 | 166 | 17.1 | 29.8 |
| 15 | 155 | 819 | 43 | 77.8 | 147 | 169 | 17.6 | 30.4 |
| 16 | 143 | 715 | 44.5 | 85.4 | 149 | 175 | 18.2 | 32 |
| 17 | 147 | 614 | 48 | 109 | 148 | 176 | 18.4 | 32.8 |
| 18 | 134 | 545 | 50.9 | 89.5 | 155 | 178 | 19.4 | 33.7 |
| 19 | 136 | 640 | 40.1 | 84 | 155 | 177 | 16.4 | 29.2 |
explanatory=c("gender", "age_at_diagnosis", "category", "BMI_Categories", "Weight_Categories", "Height_Categories")
explanatory_multi=c("age_at_diagnosis", "category")
dependent = "Surv(OS, dead)"
BMI_at_diagnosis %>% finalfit(dependent, explanatory, explanatory_multi) %>% gt()
| Dependent: Surv(OS, dead) | all | HR (univariable) | HR (multivariable) | |
|---|---|---|---|---|
| gender | MALE | 2086 (56.7) | - | - |
| FEMALE | 1590 (43.3) | 0.99 (0.85-1.15, p=0.889) | - | |
| age_at_diagnosis | Mean (SD) | 7.0 (5.4) | 1.02 (1.00-1.03, p=0.009) | 1.02 (1.01-1.04, p=0.002) |
| category | Solid | 1042 (28.3) | - | - |
| Leukemia | 1035 (28.2) | 1.11 (0.90-1.37, p=0.325) | 1.06 (0.85-1.31, p=0.613) | |
| Lymphoma | 611 (16.6) | 0.40 (0.29-0.57, p<0.001) | 0.35 (0.24-0.49, p<0.001) | |
| CNS | 540 (14.7) | 2.01 (1.62-2.50, p<0.001) | 1.90 (1.52-2.37, p<0.001) | |
| Bone | 242 (6.6) | 2.34 (1.80-3.05, p<0.001) | 1.96 (1.48-2.61, p<0.001) | |
| STS | 206 (5.6) | 2.26 (1.70-3.01, p<0.001) | 2.11 (1.58-2.81, p<0.001) | |
| BMI_Categories | Healthy weight | 1186 (62.8) | - | - |
| Obese | 260 (13.8) | 0.82 (0.59-1.14, p=0.234) | - | |
| Overweight | 197 (10.4) | 1.02 (0.73-1.44, p=0.888) | - | |
| Underweight | 245 (13.0) | 1.20 (0.90-1.61, p=0.215) | - | |
| Weight_Categories | Increased Weight | 295 (8.0) | - | - |
| Low weight | 246 (6.7) | 1.26 (0.87-1.83, p=0.220) | - | |
| Normal | 3135 (85.3) | 0.99 (0.75-1.31, p=0.946) | - | |
| Height_Categories | Normal | 3297 (89.7) | - | - |
| Short | 263 (7.2) | 1.22 (0.93-1.59, p=0.149) | - | |
| Tall | 116 (3.2) | 1.04 (0.68-1.59, p=0.859) | - |