# Load required libraries
library(tidyverse)
library(readxl)
library(knitr)
library(kableExtra)
library(gtsummary)
library(patchwork)
## Original metabolomics_dataset values:
##
## No Yes <NA>
## 3631 228 0
## Data loaded successfully!
## Total participants: 3859
## Group distribution:
##
## Excluded Included <NA>
## 3631 228 0
Overview
This report assesses the representativeness of the selected
Metabolomics dataset cohort relative to the overall
cohort (hereafter referred to as the “Metabolomics” and “overall”
cohorts).
Tables and plots are presented to assess the frequency, distribution,
etc. for each variable.
Assess representativeness according to demographic/baseline variable
set: - Outlined in the “Variable Category” column of the data dictionary
sheet in the data set.
Summary of Results
We find the distributions for most “primary” variables broadly
similar between the Metabolomics and overall cohorts, with some notable
differences:
Primary Variables: - Maternal age at
birth differs significantly (p<0.001), with the Metabolomics
cohort being slightly older (mean 32.9 vs 31.8 years) - DASS
domains show significant differences, with the Metabolomics
cohort having better mental health scores at 36 weeks (p=0.049) - Other
demographic variables (gender, birth weight/length, BMI, diabetes,
asthma, etc.) are well-balanced
Sample Availability: - The Metabolomics cohort shows
significantly higher engagement across all
questionnaire measures (all p<0.001) - Much higher completion rates
for ASQ questionnaires at all time points - Greater participation in
follow-up assessments
Key Differences: - Twin births are
significantly over-represented in the Metabolomics cohort (8.3% vs 3.2%,
p<0.001) - Child age differs significantly, with
Metabolomics children being older (mean 4.6 vs 3.8 years, p<0.001) -
BMI at 3 years is significantly lower in the
Metabolomics cohort (15.5 vs 15.9, p=0.006) - 3-year
asthma is significantly higher in the Metabolomics cohort (3.1%
vs 0.6%, p=0.009) - Previous pregnancies are
significantly higher in the Metabolomics cohort (p=0.039)
Data & Methods
- DEIDENTIFIED Full Participant list Perron - Final dataset
July 2025.xlsx
- Contains data and data dictionary.
- After some cleaning and the assignment of variable names, we get the
following dimensions (rows, columns):
dim(dat)
## [1] 3859 88
We assess the variable similarity between Metabolomics cohort (N =
228) and overall cohort (N = 3859) using:
- Summary tabulations
- Distributional plots
- Simple statistical tests
- Note: the p-values presented can be interpreted with a grain of
salt. Often in cases with large sample sizes, non-meaningful differences
(in reality) return “significant” p-values.
Variable Breakdown
There are ~79 candidate variables that can be used to assess the
similarity between the Metabolomics and overall cohorts. - These
variables are broadly “sample availability”, “child/maternal
demographic”, “child/maternal characteristics”.
Firstly, let’s select just a handful candidate variables with the aim
of getting an overall “snapshot” of the similarity between the
sub-cohort and overall cohort. - For example, we want to ensure the
sub-cohort is not entirely female, born in a single year, of a single
ethnic origin, etc.
“Primary” variables (amended to include the 19 variables
outlined) - Gender of child - Maternal age at birth - Maternal
pre-pregnancy weight - Maternal pre-pregnancy height - Maternal
pre-pregnancy BMI - Infant weight - Infant length - Infant BMI at birth
- Infant ethnic origin - Indigenous status of baby - Vaginal or
C-section birth - Maternal gestational diabetes status - Maternal Type 1
or Type 2 Diabetes - Maternal mental health diagnosis (Depression,
Anxiety disorder, Bipolar, Schizophrenia, OCD, Anorexia Nervous,
Specific Phobias, Behavioural Disorders) - Individual disorder
breakdown: Each mental health condition is now analyzed
separately, allowing participants with multiple conditions to be counted
in each relevant category - Any mental health
diagnosis: Overall indicator of any mental health condition -
Depression: Depressive disorders -
Anxiety: Anxiety disorders
- Bipolar: Bipolar affective disorder -
Schizophrenia: Schizophrenia spectrum disorders -
OCD: Obsessive-compulsive disorder -
Anorexia: Anorexia nervosa - Phobias:
Specific phobias - Behavioural: Behavioural disorders -
Maternal Asthma - Number of 18wk DASS domains with “Severe” or
“Extremely Severe” - Number of 18wk DASS domains with “Normal” - Number
of 36wk DASS domains with “Severe” or “Extremely Severe” - Number of
36wk DASS domains with “Normal”
Sample availability variables - Availability of
maternal/child urine/blood/stool samples (20 weeks, 2 months, 6 months,
12 months, 3 years) - ASQ completion (4 month, 9 month, 1 year, 3 year,
5 year) - Early Connors assigned and completed - REDCap questionnaires
assigned and completed - Availability of MNS data - Total questionnaires
completed
Outcome variables - 1yr child wheeze - 1 year BMI -
1 year Ferritin results - 1 year count of positive SPT wheals - 1 year
any positive food SPT wheals - 1 year any positive airborne/enviro SPT
wheals - 3 year count of positive SPT wheals - 3 year any positive food
SPT wheals - 3 year any positive airborne/enviro SPT wheals - 3 year BMI
- 3 year wheeze - 3 year asthma - 3 year ferritin - 5 year BMI - 5 year
asthma - 5 year Ferritin - 5 year any positive food SPT wheals - 5 year
any positive airborne/enviro SPT wheals - 3 year count of Connors
domains equal to, or above 65 - 3 year count of other clinical
indicators parent reported as “3” highest
Remaining variables - Variables not contained in the
primary nor sample availability variable set.
1) Primary Variables
Broadly, the distribution “primary” variable set between the
Metabolomics and overall cohorts is similar.
Notes - Child sex is well-balanced
between groups (45.6% vs 47.9% female, p=0.517) - Maternal age
at birth differs significantly, with the Metabolomics cohort
being older (mean 32.9 vs 31.8 years, p<0.001) - Maternal
pre-pregnancy characteristics (weight, height, BMI) are
well-balanced between groups (all p>0.69) - Infant birth
characteristics (weight, length, BMI) show no significant
differences (all p>0.11) - Ethnic origin and indigenous
status are well-balanced (p=0.407 and p=1.000 respectively) -
Birth type and maternal diabetes show no significant
differences (p=0.234 and p=0.912) - Mental health diagnosis
patterns: - Overall mental health diagnoses are similar between
groups (11.4% vs 14.5%, p=0.198) - Individual disorders show similar
distributions across both cohorts - No participants in the Metabolomics
cohort had bipolar disorder, schizophrenia, OCD, anorexia, phobias, or
behavioural disorders - Maternal asthma shows no
significant difference (5.7% vs 8.1%, p=0.223) - DASS mental
health scores: - 18-week assessments show the Metabolomics
cohort trends toward better mental health (more “Normal” scores,
p=0.074) - 36-week assessments show significantly more “Normal”
scores in the Metabolomics cohort (p=0.049) - Generally fewer
severe mental health symptoms in the Metabolomics group
# Primary continuous variables
primary_continuous <- list(
list("gender_child", "Child sex assigned at birth"),
list("maternal_age_birth", "Maternal Age (Birth)"),
list("maternal_prepreg_weight", "Maternal pre-pregnancy weight"),
list("maternal_prepreg_height", "Maternal Pre-pregnancy height"),
list("maternal_prepreg_bmi_calc", "Maternal Pre-pregnancy BMI (Calc.)"),
list("infant_weight", "Infant Weight"),
list("infant_length", "Infant Length at birth"),
list("infant_bmi_calc", "Infant BMI at birth (Calc.)"),
list("ethnic_origin", "Ethnic Origin"),
list("indigenous_status", "Indigenous Status of Baby"),
list("birth_type_derived", "Vaginal or C section birth (Deriv.)"),
list("maternal_gest_diabetes_derived", "Maternal Gestational Diabetes? (Deriv.)"),
list("maternal_diabetes_t1t2_derived", "Maternal Type 1 or Type 2 Diabetes? (Deriv.)"),
list("mh_any", "Any Maternal Mental Health Diagnosis"),
list("mh_depression", "Maternal Depression"),
list("mh_anxiety", "Maternal Anxiety Disorder"),
list("mh_bipolar", "Maternal Bipolar Disorder"),
list("mh_schizophrenia", "Maternal Schizophrenia"),
list("mh_ocd", "Maternal OCD"),
list("mh_anorexia", "Maternal Anorexia"),
list("mh_phobias", "Maternal Specific Phobias"),
list("mh_behavioural", "Maternal Behavioural Disorders"),
list("maternal_asthma_derived", "Maternal Asthma? (Deriv.)"),
list("dass21_18w_severe_count", "Number of 18wk DASS domains with \"Severe\" or \"Extremely Severe\""),
list("dass21_18w_normal_count", "Number of 18wk DASS domains with \"Normal\""),
list("dass21_36w_severe_count", "Number of 36wk DASS domains with \"Severe\" or \"Extremely Severe\""),
list("dass21_36w_normal_count", "Number of 36wk DASS domains with \"Normal\"")
)
for(var_info in primary_continuous) {
var_name <- var_info[[1]]
title <- var_info[[2]]
if(var_name %in% numeric_vars) {
analyze_continuous(dat, var_name, title)
} else {
analyze_categorical(dat, var_name, title)
}
}
Child sex assigned at birth
P-value: 0.517 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Female
|
104 (45.6%)
|
1849 (47.9%)
|
|
Male
|
124 (54.4%)
|
2010 (52.1%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Maternal Age (Birth)
P-value: <0.001 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
228
|
32.9 (24, 44)
|
32 (30, 35)
|
|
Overall
|
3859
|
31.8 (17, 50)
|
32 (29, 35)
|
Unknown: Included = 0 , Overall = 0
Maternal pre-pregnancy weight
P-value: 0.718 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
216
|
69.7 (47, 128)
|
67 (60, 75.2)
|
|
Overall
|
3633
|
70.5 (38, 134)
|
68 (60, 79)
|
Unknown: Included = 12 , Overall = 226
Maternal Pre-pregnancy height
P-value: 0.714 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
219
|
1.7 (1.5, 1.8)
|
1.7 (1.6, 1.7)
|
|
Overall
|
3698
|
1.7 (1.4, 1.9)
|
1.6 (1.6, 1.7)
|
Unknown: Included = 9 , Overall = 161
Maternal Pre-pregnancy BMI (Calc.)
P-value: 0.693 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
216
|
25.5 (17.3, 42.3)
|
24.2 (21.9, 28.7)
|
|
Overall
|
3623
|
25.7 (14.7, 47.3)
|
24.7 (21.9, 28.6)
|
Unknown: Included = 12 , Overall = 236
Infant Weight
P-value: 0.380 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
209
|
3331.1 (1480, 4500)
|
3360 (3070, 3655)
|
|
Overall
|
3578
|
3369.2 (1095, 5410)
|
3390 (3062.8, 3695)
|
Unknown: Included = 19 , Overall = 281
Infant Length at birth
P-value: 0.112 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
209
|
50.1 (39, 57)
|
50 (48, 52)
|
|
Overall
|
3574
|
50.3 (31, 60)
|
50 (49, 52)
|
Unknown: Included = 19 , Overall = 285
Infant BMI at birth (Calc.)
P-value: 0.976 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
209
|
13.2 (7.6, 17.6)
|
13.2 (12.4, 14.1)
|
|
Overall
|
3574
|
13.3 (7.6, 32.3)
|
13.2 (12.3, 14.2)
|
Unknown: Included = 19 , Overall = 285
Ethnic Origin
P-value: 0.407 (Fisher’s exact test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
19 (8.3%)
|
285 (7.4%)
|
|
1
|
178 (78.1%)
|
2951 (76.5%)
|
|
3
|
8 (3.5%)
|
177 (4.6%)
|
|
4
|
2 (0.9%)
|
116 (3%)
|
|
5
|
2 (0.9%)
|
18 (0.5%)
|
|
8
|
19 (8.3%)
|
288 (7.5%)
|
|
10
|
0 (0%)
|
8 (0.2%)
|
|
6
|
0 (0%)
|
4 (0.1%)
|
|
7
|
0 (0%)
|
12 (0.3%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Indigenous Status of Baby
P-value: 1.000 (Fisher’s exact test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
19 (8.3%)
|
283 (7.3%)
|
|
4
|
209 (91.7%)
|
3563 (92.3%)
|
|
1
|
0 (0%)
|
12 (0.3%)
|
|
2
|
0 (0%)
|
1 (0%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Vaginal or C section birth (Deriv.)
P-value: 0.242 (Fisher’s exact test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
19 (8.3%)
|
281 (7.3%)
|
|
Caesarean Elective
|
67 (29.4%)
|
964 (25%)
|
|
Caesarean Emergency
|
44 (19.3%)
|
804 (20.8%)
|
|
Vaginal
|
98 (43%)
|
1810 (46.9%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Maternal Gestational Diabetes? (Deriv.)
P-value: 0.912 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
19 (8.3%)
|
281 (7.3%)
|
|
FALSE
|
192 (84.2%)
|
3271 (84.8%)
|
|
TRUE
|
17 (7.5%)
|
307 (8%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Maternal Type 1 or Type 2 Diabetes? (Deriv.)
P-value: 1.000 (Fisher’s exact test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
19 (8.3%)
|
281 (7.3%)
|
|
FALSE
|
209 (91.7%)
|
3572 (92.6%)
|
|
TRUE
|
0 (0%)
|
6 (0.2%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Any Maternal Mental Health Diagnosis
P-value: 0.198 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
FALSE
|
202 (88.6%)
|
3298 (85.5%)
|
|
TRUE
|
26 (11.4%)
|
561 (14.5%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Maternal Depression
P-value: 0.551 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
FALSE
|
213 (93.4%)
|
3557 (92.2%)
|
|
TRUE
|
15 (6.6%)
|
302 (7.8%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Maternal Anxiety Disorder
P-value: 0.159 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
FALSE
|
210 (92.1%)
|
3437 (89.1%)
|
|
TRUE
|
18 (7.9%)
|
422 (10.9%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Maternal Bipolar Disorder
P-value: 1.000 (Fisher’s exact test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
FALSE
|
228 (100%)
|
3845 (99.6%)
|
|
TRUE
|
0 (0%)
|
14 (0.4%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Maternal Schizophrenia
P-value: NA
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
FALSE
|
228 (100%)
|
3859 (100%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Maternal OCD
P-value: NA
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
FALSE
|
228 (100%)
|
3859 (100%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Maternal Anorexia
P-value: 0.620 (Fisher’s exact test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
FALSE
|
228 (100%)
|
3843 (99.6%)
|
|
TRUE
|
0 (0%)
|
16 (0.4%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Maternal Specific Phobias
P-value: NA
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
FALSE
|
228 (100%)
|
3859 (100%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Maternal Behavioural Disorders
P-value: 1.000 (Fisher’s exact test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
FALSE
|
228 (100%)
|
3849 (99.7%)
|
|
TRUE
|
0 (0%)
|
10 (0.3%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Maternal Asthma? (Deriv.)
P-value: 0.223 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
19 (8.3%)
|
281 (7.3%)
|
|
FALSE
|
196 (86%)
|
3264 (84.6%)
|
|
TRUE
|
13 (5.7%)
|
314 (8.1%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Number of 18wk DASS domains with “Severe” or “Extremely Severe”
P-value: 0.102 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
196
|
0.1 (0, 2)
|
0 (0, 0)
|
|
Overall
|
2781
|
0.1 (0, 3)
|
0 (0, 0)
|
Unknown: Included = 32 , Overall = 1078
Number of 18wk DASS domains with “Normal”
P-value: 0.074 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
196
|
2.7 (0, 3)
|
3 (3, 3)
|
|
Overall
|
2781
|
2.6 (0, 3)
|
3 (2, 3)
|
Unknown: Included = 32 , Overall = 1078
Number of 36wk DASS domains with “Severe” or “Extremely Severe”
P-value: 0.235 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
127
|
0 (0, 3)
|
0 (0, 0)
|
|
Overall
|
1570
|
0.1 (0, 3)
|
0 (0, 0)
|
Unknown: Included = 101 , Overall = 2289
Number of 36wk DASS domains with “Normal”
P-value: 0.049 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
127
|
2.8 (0, 3)
|
3 (3, 3)
|
|
Overall
|
1570
|
2.6 (0, 3)
|
3 (3, 3)
|
Unknown: Included = 101 , Overall = 2289
2) Sample Availability
In general, sample availability and engagement in the
Metabolomics cohort is substantially higher relative to the
overall cohort, with significant differences across multiple
measures.
# Sample availability variables
sample_vars <- list(
list("mns_data_available", "MNS Data Available?"),
list("asq_assigned", "ASQ Questionnaires Assigned"),
list("asq_completed", "ASQ Questionnaires Completed"),
list("early_connors_assigned", "Early Connors Assigned"),
list("early_connors_completed", "Early Connors Completed"),
list("aes_assigned", "AES Questionnaires Assigned"),
list("aes_completed", "AES Questionnaires Completed"),
list("redcap_assigned", "RedCap Questionnaires Assigned"),
list("redcap_completed", "RedCap Questionnaires completed"),
list("questionnaires_total_completed", "Total Questionnaires Completed"),
list("asq_4m_completed", "ASQ 4 Month Completed"),
list("asq_4m_paed_review", "ASQ 4 Month Review with Paediatrician"),
list("asq_9m_completed", "ASQ 9 Month Completed"),
list("asq_9m_paed_review", "ASQ 9 Month Review with Paediatrician"),
list("asq_1yr_completed", "ASQ 1 Year Completed"),
list("asq_1yr_paed_review", "ASQ 1 Year Review with Paediatrician"),
list("asq_3yr_completed", "ASQ 3 Year Completed"),
list("asq_3yr_paed_review", "ASQ 3 Year Review with Paediatrician"),
list("asq_5yr_completed", "ASQ 5 Year Completed"),
list("asq_5yr_paed_review", "ASQ 5 Year Review with Paediatrician"),
list("asq_paed_review_count", "Number of times ASQ has prompted review with PAED")
)
for(var_info in sample_vars) {
var_name <- var_info[[1]]
title <- var_info[[2]]
if(var_name %in% numeric_vars) {
analyze_continuous(dat, var_name, title)
} else {
analyze_categorical(dat, var_name, title)
}
}
MNS Data Available?
P-value: 0.618 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
FALSE
|
19 (8.3%)
|
281 (7.3%)
|
|
TRUE
|
209 (91.7%)
|
3578 (92.7%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
ASQ Questionnaires Assigned
P-value: <0.001 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
228
|
6 (3, 8)
|
6 (5, 8)
|
|
Overall
|
3591
|
5.1 (0, 8)
|
5 (4, 6)
|
Unknown: Included = 0 , Overall = 268
ASQ Questionnaires Completed
P-value: <0.001 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
228
|
4.3 (0, 9)
|
4 (3, 5)
|
|
Overall
|
3591
|
2.6 (0, 9)
|
2 (1, 4)
|
Unknown: Included = 0 , Overall = 268
Early Connors Assigned
P-value: <0.001 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
228
|
1.2 (0, 2)
|
1 (1, 2)
|
|
Overall
|
3591
|
0.8 (0, 2)
|
1 (0, 1)
|
Unknown: Included = 0 , Overall = 268
Early Connors Completed
P-value: <0.001 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
228
|
0.8 (0, 2)
|
1 (0, 1)
|
|
Overall
|
3591
|
0.4 (0, 2)
|
0 (0, 1)
|
Unknown: Included = 0 , Overall = 268
AES Questionnaires Assigned
P-value: <0.001 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
228
|
3.9 (1, 5)
|
4 (4, 4)
|
|
Overall
|
3591
|
3.4 (0, 5)
|
3 (3, 4)
|
Unknown: Included = 0 , Overall = 268
AES Questionnaires Completed
P-value: <0.001 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
228
|
1.9 (0, 5)
|
2 (1, 3)
|
|
Overall
|
3591
|
1.1 (0, 5)
|
1 (0, 2)
|
Unknown: Included = 0 , Overall = 268
RedCap Questionnaires Assigned
P-value: <0.001 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
228
|
5.9 (2, 7)
|
7 (5, 7)
|
|
Overall
|
3591
|
5.1 (1, 7)
|
5 (4, 7)
|
Unknown: Included = 0 , Overall = 268
RedCap Questionnaires completed
P-value: <0.001 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
228
|
4.1 (0, 7)
|
5 (3, 6)
|
|
Overall
|
3591
|
2.8 (0, 7)
|
3 (1, 4)
|
Unknown: Included = 0 , Overall = 268
Total Questionnaires Completed
P-value: <0.001 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
228
|
11 (0, 22)
|
11 (8, 14)
|
|
Overall
|
3859
|
6.3 (0, 22)
|
6 (2, 10)
|
Unknown: Included = 0 , Overall = 0
ASQ 4 Month Completed
P-value: <0.001 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
FALSE
|
70 (30.7%)
|
2004 (51.9%)
|
|
TRUE
|
158 (69.3%)
|
1855 (48.1%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
ASQ 4 Month Review with Paediatrician
P-value: 0.044 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
70 (30.7%)
|
2004 (51.9%)
|
|
FALSE
|
88 (38.6%)
|
885 (22.9%)
|
|
TRUE
|
70 (30.7%)
|
970 (25.1%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
ASQ 9 Month Completed
P-value: <0.001 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
FALSE
|
60 (26.3%)
|
2141 (55.5%)
|
|
TRUE
|
168 (73.7%)
|
1718 (44.5%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
ASQ 9 Month Review with Paediatrician
P-value: <0.001 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
60 (26.3%)
|
2141 (55.5%)
|
|
FALSE
|
100 (43.9%)
|
807 (20.9%)
|
|
TRUE
|
68 (29.8%)
|
911 (23.6%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
ASQ 1 Year Completed
P-value: <0.001 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
FALSE
|
20 (8.8%)
|
1327 (34.4%)
|
|
TRUE
|
208 (91.2%)
|
2532 (65.6%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
ASQ 1 Year Review with Paediatrician
P-value: 0.003 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
20 (8.8%)
|
1327 (34.4%)
|
|
FALSE
|
127 (55.7%)
|
1293 (33.5%)
|
|
TRUE
|
81 (35.5%)
|
1239 (32.1%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
ASQ 3 Year Completed
P-value: <0.001 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
FALSE
|
91 (39.9%)
|
2684 (69.6%)
|
|
TRUE
|
137 (60.1%)
|
1175 (30.4%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
ASQ 3 Year Review with Paediatrician
P-value: 0.458 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
91 (39.9%)
|
2684 (69.6%)
|
|
FALSE
|
48 (21.1%)
|
450 (11.7%)
|
|
TRUE
|
89 (39%)
|
725 (18.8%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
ASQ 5 Year Completed
P-value: <0.001 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
FALSE
|
182 (79.8%)
|
3611 (93.6%)
|
|
TRUE
|
46 (20.2%)
|
248 (6.4%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
ASQ 5 Year Review with Paediatrician
P-value: 0.958 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
182 (79.8%)
|
3611 (93.6%)
|
|
FALSE
|
14 (6.1%)
|
72 (1.9%)
|
|
TRUE
|
32 (14%)
|
176 (4.6%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Number of times ASQ has prompted review with PAED
P-value: 0.011 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
225
|
1.5 (0, 4)
|
1 (1, 2)
|
|
Overall
|
3014
|
1.3 (0, 4)
|
1 (1, 2)
|
Unknown: Included = 3 , Overall = 845
3) Outcome Variables
Notes - Child wheeze at 1 year: No
significant difference between groups (16.7% vs 14.0%, p=0.243) -
BMI measurements: - 1-year BMI shows no significant
difference (p=0.199) - 3-year BMI is significantly
lower in the Metabolomics cohort (15.5 vs 15.9, p=0.006) -
5-year BMI shows no significant difference (p=0.256) - Wheeze
and asthma outcomes: - 1-year and 3-year wheeze show no
significant differences - 3-year asthma is significantly
higher in the Metabolomics cohort (3.1% vs 0.6%, p=0.009) -
5-year asthma shows no significant difference - Skin prick test
(SPT) results: - Generally similar patterns between groups
across all time points - Slight trend toward more positive airborne
results in Metabolomics cohort at 3 years (p=0.074) - No significant
differences in food allergies at any time point - Ferritin
levels: No significant differences at 1 year (p=0.871), 3 years
(p=0.805), or 5 years (p=0.883) - Behavioral
assessments: - Several ASQ paediatric review measures show
significant differences (4-month p=0.044, 1-year p=0.003) - ASQ
paediatric review count is significantly higher (p=0.011) - Connors
domain scores and clinical indicators show no significant differences -
Follow-up participation: The Metabolomics cohort shows
much higher engagement in data collection - Overall
assessment: Most clinical outcome variables demonstrate good
representativeness, with the main differences being consistently higher
study engagement and some specific clinical outcomes (lower BMI, higher
asthma rate at 3 years)
# Outcome variables
outcome_vars <- list(
list("child_wheeze_1yr", "Has your child ever had a wheezed at 1 Year?"),
list("bmi_1yr_calc", "BMI at 1 Year (Calc.)"),
list("ferritin_1yr", "Ferritin Results at 1 Year"),
list("spt_positive_count_1yr", "Count of positive SPT wheals(>= 3MM WHEAL) at 1 Year"),
list("spt_food_positive_1yr", "Any positive Food SPT wheals (>=3mm) at 1 Year"),
list("spt_airborne_positive_1yr", "Any positive airborne/enviro SPT wheals (>=3mm) at 1 Year"),
list("spt_positive_count_3yr", "Count of positive SPT wheals(>= 3MM WHEAL) at 3 Years"),
list("spt_food_positive_3yr", "Any positive Food SPT wheals (>=3mm) at 3 Years"),
list("spt_airborne_positive_3yr", "Any positive airborne/enviro SPT wheals (>=3mm) at 3 Years"),
list("bmi_3yr_calc", "BMI at 3 Years (Calc.)"),
list("wheeze_3yr", "3 year wheeze"),
list("asthma_3yr", "3 year asthma"),
list("followup_3yr", "3yr ferritin"),
list("bmi_5yr_calc", "BMI at 5 Years (Calc.)"),
list("asthma_5yr", "5 year asthma"),
list("ferritin_5yr", "5yr ferritin"),
list("spt_positive_count_5yr", "Count of positive SPT wheals (>=3mm) at 5 Years"),
list("spt_food_positive_5yr", "Any positive Food SPT wheals (>=3mm) at 5 Years"),
list("spt_airborne_positive_5yr", "Any positive airborne/enviro SPT wheals (>=3mm) at 5 Years"),
list("connors_domains_above65_3yr", "Count of Connors domains equal to, or above 65 at 3 Years"),
list("clinical_indicators_highest_3yr", "Count of other clinical indicators parent reported as \"3\" highest at 3 Years")
)
for(var_info in outcome_vars) {
var_name <- var_info[[1]]
title <- var_info[[2]]
if(var_name %in% numeric_vars) {
analyze_continuous(dat, var_name, title)
} else {
analyze_categorical(dat, var_name, title)
}
}
Has your child ever had a wheezed at 1 Year?
P-value: 0.243 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
16 (7%)
|
1315 (34.1%)
|
|
No
|
174 (76.3%)
|
2002 (51.9%)
|
|
Yes
|
38 (16.7%)
|
542 (14%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
BMI at 1 Year (Calc.)
P-value: 0.199 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
170
|
16.9 (13.8, 21.6)
|
16.8 (16, 17.7)
|
|
Overall
|
1832
|
17 (10.2, 26.1)
|
16.9 (16, 18)
|
Unknown: Included = 58 , Overall = 2027
Ferritin Results at 1 Year
P-value: 0.871 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
86
|
32.1 (5, 367)
|
24.5 (16.2, 37.5)
|
|
Overall
|
542
|
32.1 (5, 871)
|
25 (16, 37)
|
Unknown: Included = 142 , Overall = 3317
Count of positive SPT wheals(>= 3MM WHEAL) at 1 Year
P-value: 0.812 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
225
|
0.2 (0, 5)
|
0 (0, 0)
|
|
Overall
|
2383
|
0.2 (0, 8)
|
0 (0, 0)
|
Unknown: Included = 3 , Overall = 1476
Any positive Food SPT wheals (>=3mm) at 1 Year
P-value: 0.824 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
3 (1.3%)
|
1476 (38.2%)
|
|
FALSE
|
203 (89%)
|
2165 (56.1%)
|
|
TRUE
|
22 (9.6%)
|
218 (5.6%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Any positive airborne/enviro SPT wheals (>=3mm) at 1 Year
P-value: 0.363 (Fisher’s exact test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
3 (1.3%)
|
1476 (38.2%)
|
|
FALSE
|
222 (97.4%)
|
2325 (60.2%)
|
|
TRUE
|
3 (1.3%)
|
58 (1.5%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Count of positive SPT wheals(>= 3MM WHEAL) at 3 Years
P-value: 0.091 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
131
|
0.2 (0, 3)
|
0 (0, 0)
|
|
Overall
|
1034
|
0.2 (0, 5)
|
0 (0, 0)
|
Unknown: Included = 97 , Overall = 2825
Any positive Food SPT wheals (>=3mm) at 3 Years
P-value: 0.812 (Fisher’s exact test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
97 (42.5%)
|
2825 (73.2%)
|
|
FALSE
|
127 (55.7%)
|
992 (25.7%)
|
|
TRUE
|
4 (1.8%)
|
42 (1.1%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Any positive airborne/enviro SPT wheals (>=3mm) at 3 Years
P-value: 0.074 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
97 (42.5%)
|
2825 (73.2%)
|
|
FALSE
|
119 (52.2%)
|
882 (22.9%)
|
|
TRUE
|
12 (5.3%)
|
152 (3.9%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
BMI at 3 Years (Calc.)
P-value: 0.006 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
113
|
15.5 (12.7, 19.3)
|
15.4 (14.7, 16.4)
|
|
Overall
|
1021
|
15.9 (11.3, 27.3)
|
15.8 (15, 16.7)
|
Unknown: Included = 115 , Overall = 2838
3 year wheeze
P-value: 0.835 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
86 (37.7%)
|
2597 (67.3%)
|
|
FALSE
|
108 (47.4%)
|
973 (25.2%)
|
|
TRUE
|
34 (14.9%)
|
289 (7.5%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
3 year asthma
P-value: 0.009 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
87 (38.2%)
|
2603 (67.5%)
|
|
FALSE
|
134 (58.8%)
|
1233 (32%)
|
|
TRUE
|
7 (3.1%)
|
23 (0.6%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
3yr ferritin
P-value: 0.805 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
107
|
21.7 (7, 74)
|
18 (14, 25)
|
|
Overall
|
610
|
22.1 (5, 175)
|
19 (14, 26)
|
Unknown: Included = 121 , Overall = 3249
BMI at 5 Years (Calc.)
P-value: 0.256 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
70
|
15.6 (12.6, 26.4)
|
15.5 (14.7, 16.3)
|
|
Overall
|
466
|
15.8 (11.5, 26.4)
|
15.6 (14.9, 16.5)
|
Unknown: Included = 158 , Overall = 3393
5 year asthma
P-value: 0.648 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
153 (67.1%)
|
3374 (87.4%)
|
|
FALSE
|
70 (30.7%)
|
461 (11.9%)
|
|
TRUE
|
5 (2.2%)
|
24 (0.6%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
5yr ferritin
P-value: 0.883 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
41
|
24.5 (7, 55)
|
21 (17, 30)
|
|
Overall
|
262
|
25.8 (7, 124)
|
22 (17, 30)
|
Unknown: Included = 187 , Overall = 3597
Count of positive SPT wheals (>=3mm) at 5 Years
P-value: 0.580 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
72
|
1 (0, 6)
|
0 (0, 2)
|
|
Overall
|
414
|
1 (0, 10)
|
0 (0, 2)
|
Unknown: Included = 156 , Overall = 3445
Any positive Food SPT wheals (>=3mm) at 5 Years
P-value: 0.601 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
156 (68.4%)
|
3445 (89.3%)
|
|
FALSE
|
66 (28.9%)
|
388 (10.1%)
|
|
TRUE
|
6 (2.6%)
|
26 (0.7%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Any positive airborne/enviro SPT wheals (>=3mm) at 5 Years
P-value: 0.567 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
156 (68.4%)
|
3445 (89.3%)
|
|
FALSE
|
44 (19.3%)
|
268 (6.9%)
|
|
TRUE
|
28 (12.3%)
|
146 (3.8%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Count of Connors domains equal to, or above 65 at 3 Years
P-value: 0.731 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
126
|
0.4 (0, 7)
|
0 (0, 0)
|
|
Overall
|
1062
|
0.4 (0, 7)
|
0 (0, 1)
|
Unknown: Included = 102 , Overall = 2797
Count of other clinical indicators parent reported as “3” highest at
3 Years
Data not available
4) Remaining Variables
Key differences identified in remaining
variables:
- Twin births are significantly over-represented in
the Metabolomics cohort (8.3% vs 3.2%, p<0.001)
- Current child age differs significantly, with
Metabolomics children being older (mean 4.6 vs 3.8 years,
p<0.001)
- Previous pregnancies are significantly higher in
the Metabolomics cohort (mean 1.4 vs 1.2, p=0.039)
- Age at Peapod assessment differs significantly
(mean 7.6 vs 5.1 days, p<0.001)
- DASS stress scores at 36 weeks show a significant
difference (p=0.038), with the Metabolomics cohort having better stress
profiles
- Previous pregnancies parity and BMI at
Peapod show no significant differences
- Other DASS measures show no significant differences, though trends
suggest better mental health in the Metabolomics cohort
The over-representation of twins, older child age, and higher number
of previous pregnancies in the Metabolomics cohort suggests this
sub-sample represents families with longer study engagement, more
complex pregnancies, and higher parity.
# Remaining variables
remaining_vars <- list(
list("singleton_twin", "Singleton or Twin"),
list("current_age_march2024", "Current age of child (as of March 2024)"),
list("previous_pregnancies", "Previous Pregnancies"),
list("previous_pregnancies_parity", "Previous Pregnancies Parity"),
list("bmi_peapod_calc", "BMI at Peapod (Calc.)"),
list("age_days_peapod_calc", "Age (days) at Peapod (Calc.)"),
list("dass21_18w_depression", "DASS21 Depression 18 Week"),
list("dass21_18w_anxiety", "DASS21 Anxiety 18 Week"),
list("dass21_18w_stress", "DASS21 Stress 18 Week"),
list("dass21_36w_depression", "DASS21 Depression 36 Week"),
list("dass21_36w_anxiety", "DASS21 Anxiety 36 Week"),
list("dass21_36w_stress", "DASS21 Stress 36 Week")
)
for(var_info in remaining_vars) {
var_name <- var_info[[1]]
title <- var_info[[2]]
if(var_name %in% numeric_vars) {
analyze_continuous(dat, var_name, title)
} else {
analyze_categorical(dat, var_name, title)
}
}
Singleton or Twin
P-value: <0.001 (Chi-squared test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Singleton
|
209 (91.7%)
|
3735 (96.8%)
|
|
Twins
|
19 (8.3%)
|
124 (3.2%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Current age of child (as of March 2024)
P-value: <0.001 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
228
|
4.6 (1.4, 7.2)
|
4.9 (3.2, 5.9)
|
|
Overall
|
3859
|
3.8 (0.6, 7.3)
|
3.7 (2.4, 5.2)
|
Unknown: Included = 0 , Overall = 0
Previous Pregnancies
P-value: 0.039 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
209
|
1.4 (0, 7)
|
1 (0, 2)
|
|
Overall
|
3576
|
1.2 (0, 14)
|
1 (0, 2)
|
Unknown: Included = 19 , Overall = 283
Previous Pregnancies Parity
P-value: 0.211 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
209
|
0.7 (0, 4)
|
1 (0, 1)
|
|
Overall
|
3570
|
0.7 (0, 6)
|
0 (0, 1)
|
Unknown: Included = 19 , Overall = 289
BMI at Peapod (Calc.)
P-value: 0.893 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
186
|
12.8 (10, 17.1)
|
12.6 (11.9, 13.7)
|
|
Overall
|
2210
|
12.8 (8.5, 28.1)
|
12.8 (11.8, 13.8)
|
Unknown: Included = 42 , Overall = 1649
Age (days) at Peapod (Calc.)
P-value: <0.001 (Wilcoxon rank sum test)
|
Characteristic
|
N
|
Mean (Min, Max)
|
Median (Q1, Q3)
|
|
Included
|
186
|
7.6 (0, 56)
|
3 (1, 10)
|
|
Overall
|
2210
|
5.1 (0, 82)
|
2 (1, 4)
|
Unknown: Included = 42 , Overall = 1649
DASS21 Depression 18 Week
P-value: 0.712 (Fisher’s exact test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
32 (14%)
|
1078 (27.9%)
|
|
Mild
|
10 (4.4%)
|
153 (4%)
|
|
Moderate
|
6 (2.6%)
|
120 (3.1%)
|
|
Normal
|
179 (78.5%)
|
2460 (63.7%)
|
|
Severe
|
1 (0.4%)
|
24 (0.6%)
|
|
Extremely Severe
|
0 (0%)
|
24 (0.6%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
DASS21 Anxiety 18 Week
P-value: 0.078 (Fisher’s exact test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
32 (14%)
|
1078 (27.9%)
|
|
Extremely Severe
|
1 (0.4%)
|
57 (1.5%)
|
|
Mild
|
12 (5.3%)
|
302 (7.8%)
|
|
Moderate
|
10 (4.4%)
|
130 (3.4%)
|
|
Normal
|
169 (74.1%)
|
2224 (57.6%)
|
|
Severe
|
4 (1.8%)
|
68 (1.8%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
DASS21 Stress 18 Week
P-value: 0.585 (Fisher’s exact test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
32 (14%)
|
1078 (27.9%)
|
|
Extremely Severe
|
2 (0.9%)
|
23 (0.6%)
|
|
Mild
|
7 (3.1%)
|
155 (4%)
|
|
Moderate
|
6 (2.6%)
|
101 (2.6%)
|
|
Normal
|
179 (78.5%)
|
2447 (63.4%)
|
|
Severe
|
2 (0.9%)
|
55 (1.4%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
DASS21 Depression 36 Week
P-value: 0.571 (Fisher’s exact test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
101 (44.3%)
|
2289 (59.3%)
|
|
Extremely Severe
|
1 (0.4%)
|
13 (0.3%)
|
|
Mild
|
6 (2.6%)
|
86 (2.2%)
|
|
Moderate
|
1 (0.4%)
|
48 (1.2%)
|
|
Normal
|
118 (51.8%)
|
1409 (36.5%)
|
|
Severe
|
1 (0.4%)
|
14 (0.4%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
DASS21 Anxiety 36 Week
P-value: 0.804 (Fisher’s exact test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
101 (44.3%)
|
2289 (59.3%)
|
|
Extremely Severe
|
1 (0.4%)
|
32 (0.8%)
|
|
Mild
|
10 (4.4%)
|
130 (3.4%)
|
|
Moderate
|
4 (1.8%)
|
72 (1.9%)
|
|
Normal
|
111 (48.7%)
|
1310 (33.9%)
|
|
Severe
|
1 (0.4%)
|
26 (0.7%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
DASS21 Stress 36 Week
P-value: 0.037 (Fisher’s exact test)
|
Characteristic
|
Included N = 228
|
Overall N = 3859
|
|
Unknown
|
101 (44.3%)
|
2289 (59.3%)
|
|
Mild
|
2 (0.9%)
|
71 (1.8%)
|
|
Normal
|
123 (53.9%)
|
1403 (36.4%)
|
|
Severe
|
2 (0.9%)
|
31 (0.8%)
|
|
Extremely Severe
|
0 (0%)
|
12 (0.3%)
|
|
Moderate
|
0 (0%)
|
53 (1.4%)
|
|
Total
|
228 (100.0%)
|
3859 (100.0%)
|
Plotting
# Create distribution plots for key continuous variables
plot_vars <- c("maternal_age_birth", "infant_weight", "bmi_1yr_calc", "bmi_3yr_calc",
"bmi_5yr_calc", "ferritin_1yr", "ferritin_3yr", "ferritin_5yr", "dass21_18w_normal_count",
"dass21_36w_normal_count")
for(var in plot_vars) {
if(var %in% names(dat) && is.numeric(dat[[var]]) && !all(is.na(dat[[var]]))) {
# Create a more descriptive title
plot_title <- case_when(
var == "maternal_age_birth" ~ "Maternal Age at Birth",
var == "infant_weight" ~ "Infant Weight at Birth",
var == "bmi_1yr_calc" ~ "BMI at 1 Year",
var == "bmi_3yr_calc" ~ "BMI at 3 Years",
var == "bmi_5yr_calc" ~ "BMI at 5 Years",
var == "ferritin_1yr" ~ "Ferritin Levels at 1 Year",
var == "ferritin_3yr" ~ "Ferritin Levels at 3 Years",
var == "ferritin_5yr" ~ "Ferritin Levels at 5 Years",
var == "dass21_18w_normal_count" ~ "DASS Normal Domains at 18 Weeks",
var == "dass21_36w_normal_count" ~ "DASS Normal Domains at 36 Weeks",
TRUE ~ str_replace_all(var, "_", " ")
)
# Create data for three-panel plot
excluded_data <- dat %>%
filter(!is.na(.data[[var]]) & metabolomics_dataset == "Excluded") %>%
mutate(group_type = "Excluded")
included_data <- dat %>%
filter(!is.na(.data[[var]]) & metabolomics_dataset == "Included") %>%
mutate(group_type = "Included")
overall_data <- dat %>%
filter(!is.na(.data[[var]])) %>%
mutate(group_type = "Overall")
# Combine all three datasets
combined_data <- bind_rows(excluded_data, included_data, overall_data) %>%
mutate(
group_type = factor(group_type, levels = c("Excluded", "Included", "Overall"))
)
p <- combined_data %>%
ggplot(aes(x = .data[[var]])) +
geom_histogram(aes(fill = group_type), alpha = 0.7, bins = 30) +
facet_wrap(~group_type, scales = "free_y", ncol = 3) +
theme_minimal() +
labs(
title = paste("Distribution of", plot_title),
x = plot_title,
y = "Count"
) +
theme(
legend.position = "none",
plot.title = element_text(size = 14, face = "bold"),
strip.text = element_text(size = 12, face = "bold")
) +
scale_fill_manual(values = c("Excluded" = "#E74C3C", "Included" = "#3498DB", "Overall" = "#2C3E50"))
print(p)
}
}










# Create a comparison plot for ferritin levels across time points
ferritin_data <- dat %>%
select(metabolomics_dataset, ferritin_1yr, ferritin_3yr, ferritin_5yr) %>%
pivot_longer(cols = c(ferritin_1yr, ferritin_3yr, ferritin_5yr),
names_to = "time_point",
values_to = "ferritin_level") %>%
filter(!is.na(ferritin_level)) %>%
mutate(
time_point = case_when(
time_point == "ferritin_1yr" ~ "1 Year",
time_point == "ferritin_3yr" ~ "3 Years",
time_point == "ferritin_5yr" ~ "5 Years",
TRUE ~ time_point
),
time_point = factor(time_point, levels = c("1 Year", "3 Years", "5 Years"))
)
if(nrow(ferritin_data) > 0) {
p_ferritin <- ferritin_data %>%
ggplot(aes(x = ferritin_level, fill = metabolomics_dataset)) +
geom_histogram(alpha = 0.7, position = "identity", bins = 25) +
facet_grid(metabolomics_dataset ~ time_point, scales = "free") +
theme_minimal() +
labs(
title = "Ferritin Levels Comparison Across Time Points",
x = "Ferritin Level",
y = "Count"
) +
theme(
legend.position = "none",
plot.title = element_text(size = 14, face = "bold"),
strip.text = element_text(size = 11, face = "bold")
) +
scale_fill_manual(values = c("Excluded" = "#E74C3C", "Included" = "#3498DB"))
print(p_ferritin)
}

# Create a comparison plot for DASS normal domains across time points
dass_data <- dat %>%
select(metabolomics_dataset, dass21_18w_normal_count, dass21_36w_normal_count) %>%
pivot_longer(cols = c(dass21_18w_normal_count, dass21_36w_normal_count),
names_to = "time_point",
values_to = "normal_count") %>%
filter(!is.na(normal_count)) %>%
mutate(
time_point = case_when(
time_point == "dass21_18w_normal_count" ~ "18 Weeks",
time_point == "dass21_36w_normal_count" ~ "36 Weeks",
TRUE ~ time_point
)
)
if(nrow(dass_data) > 0) {
p_dass <- dass_data %>%
ggplot(aes(x = normal_count, fill = metabolomics_dataset)) +
geom_bar(alpha = 0.7, position = "dodge") +
facet_grid(metabolomics_dataset ~ time_point) +
theme_minimal() +
labs(
title = "DASS Normal Domains Comparison Across Time Points",
x = "Number of Normal DASS Domains (0-3)",
y = "Count"
) +
theme(
legend.position = "none",
plot.title = element_text(size = 14, face = "bold"),
strip.text = element_text(size = 11, face = "bold")
) +
scale_fill_manual(values = c("Excluded" = "#E74C3C", "Included" = "#3498DB")) +
scale_x_continuous(breaks = 0:3)
print(p_dass)
}

# Create a BMI trajectory comparison plot
bmi_data <- dat %>%
select(metabolomics_dataset, bmi_1yr_calc, bmi_3yr_calc, bmi_5yr_calc) %>%
pivot_longer(cols = c(bmi_1yr_calc, bmi_3yr_calc, bmi_5yr_calc),
names_to = "time_point",
values_to = "bmi") %>%
filter(!is.na(bmi)) %>%
mutate(
time_point = case_when(
time_point == "bmi_1yr_calc" ~ "1 Year",
time_point == "bmi_3yr_calc" ~ "3 Years",
time_point == "bmi_5yr_calc" ~ "5 Years",
TRUE ~ time_point
),
time_point = factor(time_point, levels = c("1 Year", "3 Years", "5 Years"))
)
if(nrow(bmi_data) > 0) {
p_bmi <- bmi_data %>%
ggplot(aes(x = bmi, fill = metabolomics_dataset)) +
geom_histogram(alpha = 0.7, position = "identity", bins = 25) +
facet_grid(metabolomics_dataset ~ time_point, scales = "free") +
theme_minimal() +
labs(
title = "BMI Comparison Across Time Points",
x = "BMI",
y = "Count"
) +
theme(
legend.position = "none",
plot.title = element_text(size = 14, face = "bold"),
strip.text = element_text(size = 11, face = "bold")
) +
scale_fill_manual(values = c("Excluded" = "#E74C3C", "Included" = "#3498DB"))
print(p_bmi)
}
