Cost of illness analysis in individuals with overweight/obesity and chronic low back pain in the Metropolitan area of Bern

Author

Affiliations

Alexander Schurz

Bern University of Applied Sciences, Switzerland

Vrije Universiteit Brussels, Belgium

Bern University, Switzerland

Published

January 22, 2026

1 Load packages

Code

library(pacman)
pacman::p_load(
  tidyverse,
  rio,
  lubridate,
  dplyr,
  tidyr,
  purrr,
  kableExtra,
  pander,
  Hmisc,
  DataExplorer,
  naniar,
  stringr,
  eq5d,
  readxl,
  stringr,
  table1,
  gridExtra,
  ggsci,
  corrplot,
  flextable)

2 Naming conventions

df.xx.yy for data frames, eg. df.qualtrics.raw, df.qualtrics.long mat.xx.yy for matrix list.xx.yy for lists, eg. list.design.choices tbl.xx.yy for tables, eg. tbl.COI.demographics.csv

3 Data import

In a first step, the necessary data are imported. The reports are called “COI Switzerland” and “Demographics Switzerland” in RedCap.
These include:

Demographic data
Medical Consumption Questionnaire (iMCQ) Switzerland, T0
Productivity Cost Questionnaire (iPCQ) Switzerland, T0
EQ-5D-5L Switzerland, T0

Code

# Import raw datasets
df.COI.CH.raw <- rio::import("../data/COI_data.csv") # Cost of illness data (iMCQ, iPCQ, EQ5D5L)
df.COI.demo.raw <- rio::import("../data/COI_demo_data.csv") # Dataset on demographics of participants at T0

# Year of T0 is relevant for cost conversion. Therefore, this variable needs to be extracted from the demographics dataset
# Use only rows with values from df.COI.CH.raw and the variable general_t0_date
# Extract the `general_t0_date` for `t0_baseline_measur_arm_1`
df.t0.date.data <- df.COI.CH.raw %>%
  filter(redcap_event_name == "t0_baseline_measur_arm_1") %>%
  select(general_id, general_t0_date)

# Filter for rows with `t0_health_economic_arm_1` and merge `general_t0_date`
df.COI.filtered <- df.COI.CH.raw %>%
  filter(redcap_event_name == "t0_health_economic_arm_1") %>%
  select(-general_t0_date) %>%
  left_join(df.t0.date.data , by = "general_id")

# Filter df.COI.demo.raw to include only rows with non-NA values in demo_ch_t0_birthyear
df.COI.demo.filtered <- df.COI.demo.raw %>%
  filter(!is.na(demo_ch_t0_birthyear))

4 Coding reference datasets

Code

# reference dataset on how data is coded in the demographics dataset.
df.demographics.coding <- rio::import("../coding/demographics_instrument_coding.csv")

5 Recalculation of costs

All costs are recalculated to report them in 2025 Swiss Francs or 2025 Belgium Euros.

We use the IMF dataset with the GDP deflator index and the PPP value for the different countries for the cost recalculation (Shemlit paper on CCEMG cost converter).

Code

df.IMF.conversion.raw <- rio::import("../reference data/IMF_cost_conversion_2025.csv", header = TRUE)

df.IMF.conversion.raw <- df.IMF.conversion.raw %>% 
  mutate(across(c("2023", "2024", "2025"), as.numeric))

# Subset for Belgium and Switzerland for years 2023, 2024, 2025
df.IMF.conversion.adapted <- df.IMF.conversion.raw %>%
  filter(
    ISO %in% c("BEL", "CHE") & 
    `WEO Subject Code` %in% c("NGDP_D", "PPPEX") & 
    (!is.na(`2023`) | !is.na(`2024`) | !is.na(`2025`))  # Keep rows where at least one year has a value
  ) %>%
  select(Country, ISO, `WEO Subject Code`, `Subject Descriptor`, `2023`, `2024`, `2025`)

Code

# Subset data for Switzerland
df.IMF.con.CH <- df.IMF.conversion.adapted %>% 
  filter(Country == "Switzerland", `WEO Subject Code`%in% c("NGDP_D", "PPPEX")) %>% 
  select(`WEO Subject Code`, `2023`, `2024`, `2025`) %>% 
  pivot_longer(cols = `2023`:`2025`, names_to = "Year", values_to = "Value") %>% 
  pivot_wider(names_from = `WEO Subject Code`, values_from = "Value") %>% 
  rename(GDP = NGDP_D, PPP = PPPEX) %>% 
  mutate(Year = as.numeric(Year))
  
# Subset data for Belgium
df.IMF.con.BE <- df.IMF.conversion.adapted %>% 
  filter(Country == "Belgium", `WEO Subject Code`%in% c("NGDP_D", "PPPEX")) %>% 
  select(`WEO Subject Code`, `2023`, `2024`, `2025`) %>% 
  pivot_longer(cols = `2023`:`2025`, names_to = "Year", values_to = "Value") %>% 
  pivot_wider(names_from = `WEO Subject Code`, values_from = "Value") %>% 
  rename(GDP = NGDP_D, PPP = PPPEX) %>% 
  mutate(Year = as.numeric(Year))

# Function for inflation (Important variables: "Year T0" und "Price (CHF)")
# Funktion zur Preisberechnung
convert_price_to_2025 <- function(price_chf, year_t0) {
  # Ensure year_t0 is numeric
  year_t0 <- as.numeric(year_t0)

  # GDP-Values for relevant years
  gdp_values <- df.IMF.con.CH %>%
    filter(Year %in% c(2025, unique(year_t0))) %>%
    select(Year, GDP) %>%
    pivot_wider(names_from = Year, values_from = GDP)

  # GDP values for 2025
  gdp_2025 <- gdp_values[[as.character(2025)]]

  # GDP values for year_t0
  gdp_year_t0 <- gdp_values[match(year_t0, colnames(gdp_values))]

  # If there are no GDP values, add NA
  gdp_ratio <- ifelse(!is.na(gdp_year_t0), gdp_2025 / gdp_year_t0, NA)

  # Adapt price
  price_2025 <- as.numeric(price_chf) * as.numeric(gdp_ratio)

  return(price_2025)
}

Extracting the PPP values for Belgium and Switzerland:

Code

# Dataset for conversion from 2025 CHF to 2025 Belgium Euro
# Extract PPP Value for 2025 for Belgium 
PPP_BE_2025 <- df.IMF.con.BE %>%
  filter(Year == 2025) %>%
  pull(PPP)

# Extract PPP Value for 2025 for Switzerland
PPP_CH_2025 <- df.IMF.con.CH %>%
  filter(Year == 2025) %>%
  pull(PPP)

# Variable to convert 2025 CHF into 2025 Euro
var.conv.CHF.to.EUR <- PPP_BE_2025/PPP_CH_2025

6 Demographics

Code

# preparae demographics dataset
df.COI.demo.adapted <- df.COI.demo.filtered %>% 
  mutate(
    # ID
    ID = general_id,
    # Calculate Age
    Age = as.numeric(format(Sys.Date(), "%Y")) - as.numeric(demo_ch_t0_birthyear),
    
    # Convert Gender to Factor
    Sex = factor(demo_ch_t0_gender, levels = 0:3, labels = c("Male", "Female", "Other", "I don't know")),
    
    # Convert BMI to Numeric
    BMI = as.numeric(antro_bmi_autocalcu),
    
    # Categorize BMI
    "BMI category" = case_when(
      BMI >= 25 & BMI < 30 ~ "Overweight",
      BMI >= 30 ~ "Obesity",
      TRUE ~ NA_character_
    ) %>% factor(levels = c("Overweight", "Obesity")),

    # Convert Marital Status to Factor
    "Marital status" = factor(demo_ch_t0_marital_status, 
                            levels = 0:5, 
                            labels = c("Single", "Registered civil partnership", "Married", 
                                       "Widowed", "Dissolved civil partnership", "Divorced")),
    
    # Work Status
    Work = factor(demo_ch_t0_work_a___0, 
                  levels = 0:6, 
                  labels = c("I am at school/training/studying", "I am employed", "I am self-employed", 
                             "I am a housewife/househusband", "I am unemployed", 
                             "I am retired or in early retirement", "I am doing something else")),
    
    # Work Details for "something else"
    "Work details" = if_else(demo_ch_t0_work_a___0 == 6, demo_ch_t0_work_b, NA_character_),
    
    # Profession
    Profession = demo_ch_t0_profession,
    
    # Work Percentage
    "Work percentage" = as.numeric(demo_ch_t0_work_percentage),
    
    # Education
    Education = factor(demo_ch_t0_education, 
                       levels = 0:8, 
                       labels = c("I have not completed any school or training", 
                                  "Primary school or primary schools", 
                                  "Recent vocational training", 
                                  "General school of secondary level I", 
                                  "Intermediate vocational training", 
                                  "Higher general secondary school", 
                                  "School for higher vocational training", 
                                  "University", 
                                  "Other education")),
    
    # Additional Education Details
    "Other education" = if_else(demo_ch_t0_education == 9, demo_ch_t0_other_education, NA_character_),
    
    # Sports and Duration
    Sports = demo_ch_t0_sports,
    "Duration of weekly PA (min)" = as.numeric(demo_ch_t0_sport_duration),
    
    # LBP Duration
    "Duration of LBP (months)" = as.numeric(demo_ch_t0_lbp_duration),
    
    # Other Diseases
    "Other diseases" = demo_ch_t0_other_list,
    
    # Diet
    Diet = factor(demo_ch_t0_diet, levels = c(0, 1), labels = c("No", "Yes")),
    
    # Diet Type
    "Diet type" = factor(demo_ch_t0_diet_2, 
                       levels = 1:7, 
                       labels = c("Vegetarian", "Pescotarian", "Pescotarian", "Flexitarian", 
                                  "Vegan", "Paleo", "Other")),
    
    # Supplements
    Supplements = factor(demo_ch_t0_supplements, levels = c(0, 1), labels = c("No", "Yes")),
    "Supplement type" = demo_ch_t0_supplements_2,
    
    # Intolerances
    Intolerances = factor(demo_ch_t0_intolerances, levels = c(0, 1), labels = c("No", "Yes")),
    "Kind of intolerances" = demo_ch_t0_intolerances_2,
    
    # Allergies
    Allergies = factor(demo_ch_t0_allergies, levels = c(0, 1), labels = c("No", "Yes")),
    "Kind of allergy" = demo_ch_t0_allergies_2
  ) %>% 
  # Select only the specified variables
  select(
    ID, Sex, Age, BMI, "BMI category", "Marital status", Work, "Work details", Profession, 
    "Work percentage", Education, "Other education", Sports, "Duration of weekly PA (min)", 
    "Duration of LBP (months)", "Other diseases", Diet, "Diet type", Supplements, "Supplement type", 
    Intolerances, "Kind of intolerances", Allergies, "Kind of allergy"
  )

Check the selected variables for missing values.

Code

DataExplorer::plot_missing(df.COI.demo.adapted, title = "Missing values COI Demographics Switzerland")

Important: Variables “Other_education”, “Diet_type” and “Work_detail” were variables that were only specified if needed. Therefore the missing data do not need to be taken into account. Therefore, there are no missing data in the demographics dataset.

7 EQ5D5L

The quality of life was evaluated using the EQ5D5L via an utility score.

Code

df.EQ5D5L.coding <- rio::import("../coding/EQ5D5L_coding.csv")

Code

df.EQ5D5L.raw <- df.COI.filtered %>%
  select(general_id, starts_with("eq5d5l_")) %>%
  select(-eq5d5l_german_complete) 

save(df.EQ5D5L.raw, file = "../Exported datasets/df.EQ5D5L.raw.rda")

Calculate utilities via 3 different value sets (DE, GB, BE) using the EQ5D package

Code

# Define the function to combine answers and create a 5-digit number
health_state <- function(q1, q2, q3, q4, q5) {
  answers <- c(q1, q2, q3, q4, q5)
  combined_number <- as.numeric(paste(answers, collapse = ""))
  return(combined_number)
}

# Develop health states and utility scores
df.EQ5D5L.adapted <- df.EQ5D5L.raw %>%
  mutate(ID = general_id) %>% 
  rowwise() %>%
  mutate(
    # Calculation of health states
    `Health state` = health_state(eq5d5l_ch_q1, eq5d5l_ch_q2, eq5d5l_ch_q3, eq5d5l_ch_q4, eq5d5l_ch_q5),
    # Subjective health (NAS 0-100)
    `Subjective health` = as.numeric(eq5d5l_ch_q6),
    # Utilities for the different value sets
    `Utility (German value set)` = eq5d(scores = `Health state`, type = "VT", country = "Germany", version = "5L", ignore.invalid = TRUE),
    `Utility (Belgium value set)` = eq5d(scores = `Health state`, type = "VT", country = "Belgium", version = "5L", ignore.invalid = TRUE),
    `Utility (GB value set)` = eq5d(scores = `Health state`, type = "VT", country = "England", version = "5L", ignore.invalid = TRUE),
    `Utility (Canadian value set)`= eq5d(scores = `Health state`, type = "VT", country = "Canada", version = "5L", ignore.invalid = TRUE)
  ) %>%
  ungroup() %>%
  # Select only relevant variables
  select(ID, `Health state`, `Subjective health`, `Utility (German value set)`, 
         `Utility (Belgium value set)`, `Utility (GB value set)`, `Utility (Canadian value set)`) %>% 
  left_join(df.COI.demo.adapted %>% 
              select(ID, Sex, Age, BMI, `BMI category`), by = "ID") %>%
  relocate(Sex, Age, BMI, `BMI category`, .after = ID)

Comparison of the current values to the values in Canada for CLBP patients.

Code

# median values, Canadian value set included due to the comparison to the reference values for CLBP patients based on the study by Poder et al. 
tbl.EQ5D5L.median <- sapply(df.EQ5D5L.adapted[, c("Subjective health", 
                                        "Utility (German value set)", 
                                        "Utility (Belgium value set)", 
                                        "Utility (GB value set)", 
                                        "Utility (Canadian value set)")], 
                  function(x) median(x, na.rm = TRUE))

tbl.EQ5D5L.median

           Subjective health   Utility (German value set) 
                     75.5000                       0.8710 
 Utility (Belgium value set)       Utility (GB value set) 
                      0.8060                       0.8165 
Utility (Canadian value set) 
                      0.8280

Code

tableone::CreateTableOne(data = df.EQ5D5L.adapted, vars = c("Subjective health", "Utility (German value set)", "Utility (Belgium value set)", "Utility (GB value set)", "Utility (Canadian value set)"), includeNA = TRUE)

                                          
                                           Overall      
  n                                           52        
  Subjective health (mean (SD))            73.06 (14.50)
  Utility (German value set) (mean (SD))    0.84 (0.13) 
  Utility (Belgium value set) (mean (SD))   0.78 (0.14) 
  Utility (GB value set) (mean (SD))        0.81 (0.13) 
  Utility (Canadian value set) (mean (SD))  0.81 (0.12)

Code

# EQ5D5L long format for utility scores
df.EQ5D5L.utility.long <- df.EQ5D5L.adapted %>%
  pivot_longer(
    cols = c("Utility (German value set)", "Utility (Belgium value set)", "Utility (GB value set)"),
    names_to = "Variable",
    values_to = "Value"
  )

# Plot for EQ5D5L values utility scores
plot.eq5d5l.utility <- ggplot(df.EQ5D5L.utility.long, aes(x = Variable, y = Value)) +
  geom_boxplot() +
  labs(title = "Boxplot of EQ5D5L Values",
       x = "Different value sets used to calculate utilities",
       y = "Utility") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ylim(0, 1)

plot.eq5d5l.utility

Code

ggsave(
  filename = "../figures/fig.EQ5D5L.utility.different.value.sets.png",
  plot = plot.eq5d5l.utility,
  width = 8,
  height = 6,
  dpi = 300)

Code

df.EQ5D5L.subj.health.long <- df.EQ5D5L.adapted %>%
  pivot_longer(
    cols = c("Subjective health"),
    names_to = "Variable",
    values_to = "Value"
  )

# Plot for EQ5D5L values utility scores
plot.eq5d5l.subj.health <- ggplot(df.EQ5D5L.subj.health.long, aes(x = Variable, y = Value)) +
  geom_boxplot() +
  labs(title = "Boxplot of subjective health status",
       y = "Numeric rating scale from 0-100") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ylim(0, 100)

plot.eq5d5l.subj.health

Code

ggsave(
  filename = "../figures/fig.EQ5D5L.subj.health.png",
  plot = plot.eq5d5l.subj.health,
  width = 8,
  height = 6,
  dpi = 300)

8 Descriptive data for demographics

Code

# Develop a table 1 to describe the demographics of the BO2WL trial- participants.
# Define variables of interest
vars.demographics.COI <- c("Sex", "Age", "BMI", "Marital status", 
                           "Work percentage", "Education", 
                           "Duration of weekly PA (min)", 
                           "Duration of LBP (months)")
df.COI.demo.adapted.2 <- df.COI.demo.adapted %>%
  mutate(`BMI category` = factor(`BMI category`, 
                                 levels = c("Overweight", "Obese"))) %>%
  mutate(across(where(is.factor), droplevels)) # Entfernt ungenutzte Faktorlevel

# Drop unused factor levels
df.COI.demo.adapted.2 <- df.COI.demo.adapted %>%
  mutate(across(where(is.factor), droplevels)) %>% 
  left_join(df.EQ5D5L.adapted %>%
              select(ID, `Subjective health`, `Utility (German value set)`) %>%
              rename(
                `Subjective health (NAS 0-100)` = `Subjective health`,
                `Utility (German value set)` = `Utility (German value set)`
              ))

Code

# Export the dataset
write.csv(df.COI.demo.adapted.2, "../exported_datasets/df.COI.demo.adapted.2.csv", row.names= TRUE)

Code

# Format labels for table1
with(df.COI.demo.adapted.2, {
  label(Sex) <- "Sex"
  label(Age) <- "Age (years)"
  label(BMI) <- "Body Mass Index"
  label(`Marital status`) <- "Marital Status"
  label(`Work percentage`) <- "Work Percentage"
  label(Education) <- "Education level"
  label(`Duration of weekly PA (min)`) <- "Weekly Physical Activity (minutes)"
  label(`Duration of LBP (months)`) <- "Duration of Low Back Pain (months)"
  label(`Subjective health (NAS 0-100)`) <- "Subjective health (NAS 0-100)"
  label(`Utility (German value set)`) <- "Utility (German value set)"
})

# Create summary table stratified by BMI category
tbl.demographics <- table1(~Sex + Age + BMI + `Marital status` + `Work percentage` + 
         Education  + `Duration of weekly PA (min)` + `Duration of LBP (months)` 
         + `Subjective health (NAS 0-100)` + `Utility (German value set)`| `BMI category`, 
       data = df.COI.demo.adapted.2, 
       overall = "Overall", 
       droplevels = FALSE)

tbl.demographics

	Overweight (N=18)	Obesity (N=35)	Overall (N=53)
Sex
Male	7 (38.9%)	12 (34.3%)	19 (35.8%)
Female	11 (61.1%)	23 (65.7%)	34 (64.2%)
Age
Mean (SD)	48.6 (11.6)	48.8 (9.41)	48.8 (10.1)
Median [Min, Max]	50.0 [30.0, 66.0]	50.0 [24.0, 65.0]	50.0 [24.0, 66.0]
BMI
Mean (SD)	28.0 (1.19)	32.9 (2.60)	31.2 (3.22)
Median [Min, Max]	27.9 [25.4, 29.8]	32.1 [30.0, 39.7]	30.8 [25.4, 39.7]
Marital status
Single	4 (22.2%)	12 (34.3%)	16 (30.2%)
Married	13 (72.2%)	18 (51.4%)	31 (58.5%)
Dissolved civil partnership	0 (0%)	1 (2.9%)	1 (1.9%)
Divorced	1 (5.6%)	4 (11.4%)	5 (9.4%)
Work percentage
Mean (SD)	69.4 (37.2)	79.8 (25.3)	76.3 (29.9)
Median [Min, Max]	80.0 [0, 100]	90.0 [0, 100]	90.0 [0, 100]
Education
Primary school or primary schools	0 (0%)	2 (5.7%)	2 (3.8%)
Recent vocational training	1 (5.6%)	0 (0%)	1 (1.9%)
General school of secondary level I	0 (0%)	1 (2.9%)	1 (1.9%)
Intermediate vocational training	4 (22.2%)	6 (17.1%)	10 (18.9%)
Higher general secondary school	2 (11.1%)	1 (2.9%)	3 (5.7%)
School for higher vocational training	6 (33.3%)	16 (45.7%)	22 (41.5%)
University	5 (27.8%)	9 (25.7%)	14 (26.4%)
Duration of weekly PA (min)
Mean (SD)	173 (153)	109 (101)	131 (124)
Median [Min, Max]	120 [0, 435]	90.0 [0, 460]	100 [0, 460]
Duration of LBP (months)
Mean (SD)	72.1 (67.6)	68.6 (76.6)	69.8 (73.0)
Median [Min, Max]	48.0 [6.00, 240]	36.0 [3.00, 324]	36.0 [3.00, 324]
Subjective health (NAS 0-100)
Mean (SD)	74.9 (15.1)	72.1 (14.3)	73.1 (14.5)
Median [Min, Max]	80.0 [40.0, 95.0]	75.0 [30.0, 90.0]	75.5 [30.0, 95.0]
Missing	1 (5.6%)	0 (0%)	1 (1.9%)
Utility (German value set)
Mean (SD)	0.876 (0.0639)	0.822 (0.153)	0.840 (0.133)
Median [Min, Max]	0.881 [0.770, 0.943]	0.865 [0.290, 1.00]	0.871 [0.290, 1.00]
Missing	1 (5.6%)	0 (0%)	1 (1.9%)

Code

write.csv(tbl.demographics, "../tables/tbl.COI.demographics.csv", row.names = TRUE)

9 Supplements and cost calculation

Code

df.COI.supplements <- df.COI.demo.adapted.2 %>% 
  select(ID, Sex, Age, BMI, `BMI category`, Supplements, `Supplement type`)

Convert the dataframe on supplements into a long formated dataframe.

Code

df.COI.supplements.long <- df.COI.supplements %>%
  # Split `Supplement type` into separate supplements
  mutate(Supplement_type_split = str_split(`Supplement type`, ";")) %>%
  unnest_longer(Supplement_type_split) %>% # Unnest into longer format
  # Trim whitespace from split supplement details
  mutate(Supplement_type_split = str_trim(Supplement_type_split)) %>%
  # Separate the details of each supplement into columns
  separate(
    Supplement_type_split,
    into = c("Name", "Company", "Dose", "Frequency", "Days"),
    sep = "/", # Use '/' as the delimiter
    fill = "right", # Fill missing values with NA
    remove = TRUE # Remove the original column after splitting
  ) %>%
  # Convert Frequency and Days to numeric
  mutate(
    Frequency = as.numeric(Frequency),
    Days = as.numeric(Days)
  ) %>%
  # Remove rows where Name, Company, or Dose are missing
  filter(!is.na(Name) & Name != "",
         !is.na(Company) & Company != "",
         !is.na(Dose) & Dose != "") %>%
  # Select only the desired columns
  select(ID, Sex, Age, BMI, `BMI category`, Name, Company, Dose, Frequency, Days)

Code

write.csv(df.COI.supplements.long, "../exported_datasets/df.COI.supplements.long.csv", row.names = TRUE)

Code

df.ref.supplements.raw <- read_excel("../reference data/Supplements_Prices.xlsx")

This merge should be done by ID, Name and Company. The coloumns on package size, price year and prices should be added.

Code

df.COI.supplements.enriched <- df.ref.supplements.raw %>%
  left_join(df.COI.supplements.long %>%
              select(ID, Sex, Age, BMI, `BMI category`) %>%
              distinct(ID, .keep_all = TRUE),  # Behalte pro ID nur eine Zeile
            by = "ID") %>%
  relocate(Sex, Age, BMI, `BMI category`, .after = ID)

Code

df.COI.supplements.enriched <- df.COI.supplements.enriched %>%
  mutate(
    `Price_1 Suppl_used` = round((`Price_1 (CHF)` / `Package size`) * Frequency * Days, 2),
    `Price_2 Suppl_used` = round((`Price_2 (CHF)` / `Package size`) * Frequency * Days, 2),
    `Price_3 Suppl_used` = round((`Price_3 (CHF)` / `Package size`) * Frequency * Days, 2),
    `Price (CHF)` = round(rowMeans(across(c(`Price_1 Suppl_used`, `Price_2 Suppl_used`, `Price_3 Suppl_used`))), 2)
  )

9.1 Recalculation of costs for supplements

Code

# Convert CHF from reference year to 2025 CHF - variable "Price (2025 CHF)"
df.COI.supplements.enriched <- df.COI.supplements.enriched %>%
  mutate(
    `Price_1 Suppl_used (2025 CHF)` = round(convert_price_to_2025(as.numeric(`Price_1 Suppl_used`), `Price year`), 2),
    `Price_2 Suppl_used (2025 CHF)` = round(convert_price_to_2025(as.numeric(`Price_2 Suppl_used`), `Price year`), 2),
    `Price_3 Suppl_used (2025 CHF)` = round(convert_price_to_2025(as.numeric(`Price_3 Suppl_used`), `Price year`), 2),
    `Price (2025 CHF)` = round(convert_price_to_2025(as.numeric(`Price (CHF)`), `Price year`), 2))

Code

df.COI.supplements.enriched <- df.COI.supplements.enriched %>%
  mutate(`Price_1 Suppl_used (2025 EUR)` = round(`Price_1 Suppl_used (2025 CHF)` * var.conv.CHF.to.EUR, 2),
         `Price_2 Suppl_used (2025 EUR)` = round(`Price_2 Suppl_used (2025 CHF)` * var.conv.CHF.to.EUR, 2),
         `Price_3 Suppl_used (2025 EUR)` = round(`Price_3 Suppl_used (2025 CHF)` * var.conv.CHF.to.EUR, 2),
         `Price (2025 EUR)`= round(`Price (2025 CHF)`* var.conv.CHF.to.EUR,2))

Code

df.COI.supplements.enriched <- df.COI.supplements.enriched %>%
  group_by(ID) %>%
  mutate(
    `Total suppl costs (2025 CHF)` = ifelse(row_number() == 1, round(sum(`Price (2025 CHF)`, na.rm = TRUE), 2), NA),
    `Total suppl costs (2025 EUR)` = ifelse(row_number() == 1, round(sum(`Price (2025 EUR)`, na.rm = TRUE), 2), NA)
  ) %>%
  ungroup()

Code

write.csv(df.COI.supplements.enriched, "../exported_datasets/df.COI.supplements.enriched.csv", row.names = TRUE)

10 iMCQ data

Now we want to analyse the data on the three questionnaires iMCQ, iPCQ and EQ-5D5L. We start with analysing the data on the iMCQ questionnaire. This questionnaire evaluates the health care use (e.g. GP or specialists consultations, therapy sessions with e.g. physiotherapist, medication use).

The dataset (df.iMCQ.coding) shows the coding of the iMCQ questions.

Code

df.iMCQ.coding <- rio::import("../coding/imcq_ch_coding.csv")

We first need to import and prepare the variables of the iMCQ dataset.

Code

df.iMCQ.raw <- df.COI.filtered %>%
  select(general_id, general_t0_date, starts_with("imcq"))

Code

df.iMCQ.adapted <- df.iMCQ.raw %>% 
  mutate(
    # ID
    ID = general_id,
    # T0 year
    "Year T0" = lubridate::year(general_t0_date),
    # consultation of healthcare professional
    "HC professional" = factor(imcq_ch_q1, levels = 0:1, labels = c("No", "Yes")),
    "GP or Medical assistant" = factor(imcq_ch_q1a, levels = 0:1, labels = c("No", "Yes")),
    "GP" = as.numeric(imcq_ch_q1b1, na.rm = TRUE),
    "Medical assistant" = as.numeric(imcq_ch_q1_b2, na.rm = TRUE),
    "Social worker" = as.numeric(imcq_ch_q2, na.rm = TRUE),
    "Physical therapist" = as.numeric(imcq_ch_q3, na.rm = TRUE),
    "Occupational therapist" = as.numeric(imcq_ch_q4, na.rm = TRUE),
    "Logopaedist" = as.numeric(imcq_ch_q5, na.rm = TRUE),
    "Nutritionist" = as.numeric(imcq_ch_q6, na.rm = TRUE),
    "Complementary medicine" = as.numeric(imcq_ch_q7, na.rm = TRUE),
    "Psychological care" = as.numeric(imcq_ch_q8, na.rm = TRUE),
    "Company doctor" = as.numeric(imcq_ch_q9, na.rm = TRUE),
    "Care" = factor(imcq_ch_q10a, levels = 0:1, labels = c("No", "Yes")),
    "Care kind" = factor(imcq_ch_q10b, 
                         levels = 0:2, 
                         labels = c("Domestic care", "Personal care", "Nursing care")),
    "Domestic care (weeks)" = as.numeric(imcq_ch_q10c, na.rm = TRUE),
    "Personal care (weeks)" = as.numeric(imcq_ch_q10c_2, na.rm = TRUE),
    "Nursing care (weeks)" = as.numeric(imcq_ch_q10c_3, na.rm = TRUE),
    "Domestic care (hours/week)" = as.numeric(imcq_ch_q10d_1, na.rm = TRUE),
    "Personal care (hours/week)" = as.numeric(imcq_ch_q10d2, na.rm = TRUE),
    "Nursing care (hours/week)" = as.numeric(imcq_ch_q10d_3, na.rm = TRUE),
    "Medication" = factor(imcq_ch_q11a, levels = 0:1, labels = c("No", "Yes")),
    "Medication kind" = imcq_ch_q11b,
    "Hospital" = factor(imcq_ch_q12a, levels = 0:1, labels = c("No", "Yes")),
    "Hospital (days)" = as.numeric(imcq_ch_q12b, na.rm = TRUE),
    "Emergency department" = as.numeric(imcq_ch_q12b, na.rm = TRUE),
    "Ambulance" = as.numeric(imcq_ch_q13, na.rm = TRUE),
    "Outpatient clinic" = factor(imcq_ch_q14a, levels = 0:1, labels = c("No", "Yes")),
    "Outpatient clinic kind" = imcq_ch_q14b,
    "Day clinic" = factor(imcq_ch_q15a, levels = 0:1, labels = c("No", "Yes")),
    "Day clinic kind" = imcq_ch_q15b,
    "Day clinic (days)" = as.numeric(imcq_ch_q15c, na.rm = TRUE),
    "Day care" = factor(imcq_ch_q16a, levels = 0:1, labels = c("No", "Yes")),
    "Day care kind" = case_when(
      imcq_ch_q16b1___1 == 1 ~ "Residential home",
      imcq_ch_q16b1___2 == 1 ~ "Rehabilitation centre",
      imcq_ch_q16b1___3 == 1 ~ "Psychiatric facility",
      imcq_ch_q16b1___4 == 1 ~ imcq_ch_q16b2  # Use the value from imcq_ch_q16b2 if condition is met
    ),
    "Day care Residential home (days)" = as.numeric(imcq_ch_q16c1, na.rm = TRUE),
    "Day care Rehabilitation centre (days)" = as.numeric(imcq_ch_q16c2, na.rm = TRUE),
    "Day care Psychiatric facility (days)" = as.numeric(imcq_ch_q16c3, na.rm = TRUE),
    "Day care Other (days)" = as.numeric(imcq_ch_q16c4, na.rm = TRUE),
    "Hospital stays" = factor(imcq_ch_q17a, levels = 0:1, labels = c("No", "Yes")),
    "Hospital stays (days)" = as.numeric(imcq_ch_q17b, na.rm = TRUE),
    "Hospital stays (days)" = as.numeric(imcq_ch_q17c, na.rm = TRUE),
    "Other inpatient stays" = factor(imcq_ch_q18a, levels = 0:1, labels = c("No", "Yes")),
    "Other inpatient stays kind" = case_when(
       imcq_ch_q18b___0 == 1 ~ "Residential home",
      imcq_ch_q18b___1 == 1 ~ "Rehabilitation centre",
      imcq_ch_q18b___2 == 1 ~ "Psychiatric facility",
      imcq_ch_q18b___3 == 1 ~ imcq_ch_q18b_1),
    "Inpatient Residential home (days)" = as.numeric(imcq_ch_q18c_1, na.rm = TRUE),
    "Inpatient Rehabilitation centre (days)" = as.numeric(imcq_ch_q18c_2, na.rm = TRUE),
    "Inpatient Psychiatric facility (days)" = as.numeric(imcq_ch_q18c_3, na.rm = TRUE),
    "Inpatient Other (days)" = as.numeric(imcq_ch_q18c_4, na.rm = TRUE),
    "Family help" = factor(imcq_ch_q19a, levels = 0:1, labels = c("No", "Yes")),
    "Family help kind" = case_when(
      imcq_ch_q19b___0 == 1 ~ "Domestic care",
      imcq_ch_q19b___1 == 1 ~ "Personal care",
      imcq_ch_q19b___2 == 1 ~ "Practical help"),
    "Family domestic (weeks)" = as.numeric(imcq_ch_q19c_1, na.rm = TRUE),
    "Family personal (weeks)" = as.numeric(imcq_ch_q19c_2, na.rm = TRUE),
    "Family practical (weeks)" = as.numeric(imcq_ch_q19c_3, na.rm = TRUE),
    "Family domestic (hours)" = as.numeric(imcq_ch_q19d_1, na.rm = TRUE),
    "Family personal (hours)" = as.numeric(imcq_ch_q19d_2, na.rm = TRUE),
    "Family practical (hours)" = as.numeric(imcq_ch_q19d_3, na.rm = TRUE),
    "Means transport hospital" = case_when(
      imcq_ch_q20a == 0 ~ "Not applicable",
      imcq_ch_q20a == 1 ~ "Pedestrian",
      imcq_ch_q20a == 2 ~ "Bicycle",
      imcq_ch_q20a == 3 ~ "Car",
      imcq_ch_q20a == 4 ~ "Public transport",
      imcq_ch_q20a == 5 ~ "Taxi",
      imcq_ch_q20a == 6 ~ "Other"),  # Use the value from imcq_ch_q20b for "Other"
    "Distance from home to hospital" = as.numeric(imcq_ch_q20b, na.rm = TRUE)
    ) %>% 
  select(
    ID, 
    `Year T0`,
    `HC professional`, 
    `GP or Medical assistant`, 
    GP, 
    `Medical assistant`, 
    `Social worker`, 
    `Physical therapist`, 
    `Occupational therapist`, 
    Logopaedist, 
    Nutritionist, 
    `Complementary medicine`, 
    `Psychological care`, 
    `Company doctor`, 
    Care, 
    `Care kind`, 
    `Domestic care (weeks)`, 
    `Personal care (weeks)`, 
    `Nursing care (weeks)`, 
    `Domestic care (hours/week)`, 
    `Personal care (hours/week)`, 
    `Nursing care (hours/week)`, 
    Medication, 
    `Medication kind`, 
    Hospital, 
    `Hospital (days)`, 
    `Emergency department`, 
    Ambulance, 
    `Outpatient clinic`, 
    `Outpatient clinic kind`, 
    `Day clinic`, 
    `Day clinic kind`, 
    `Day clinic (days)`, 
    `Day care`, 
    `Day care kind`, 
    `Day care Residential home (days)`, 
    `Day care Rehabilitation centre (days)`, 
    `Day care Psychiatric facility (days)`, 
    `Day care Other (days)`, 
    `Hospital stays`, 
    `Hospital stays (days)`, 
    `Other inpatient stays`, 
    `Other inpatient stays kind`, 
    `Inpatient Residential home (days)`, 
    `Inpatient Rehabilitation centre (days)`, 
    `Inpatient Psychiatric facility (days)`, 
    `Inpatient Other (days)`, 
    `Family help`, 
    `Family help kind`, 
    `Family domestic (weeks)`, 
    `Family personal (weeks)`, 
    `Family practical (weeks)`, 
    `Family domestic (hours)`, 
    `Family personal (hours)`, 
    `Family practical (hours)`, 
    `Means transport hospital`, 
    `Distance from home to hospital`
  ) %>% 
  mutate(
     `GP or Medical assistant` = replace_na(`GP or Medical assistant`, "No"),
    across(GP:`Company doctor`, ~ replace_na(.x, 0)),
    across(`Domestic care (weeks)`:`Nursing care (hours/week)`, ~ replace_na(.x, 0)),
    across(Hospital:`Ambulance`, ~ replace_na(.x, 0)),
    `Outpatient clinic` = replace_na(`Outpatient clinic`, "No"),
    `Day clinic` = replace_na(`Day clinic`, "No"),
    `Day clinic (days)` = replace_na(`Day clinic (days)`, 0),
    across(`Day care Residential home (days)`: `Day care Other (days)`, ~ replace_na(.x, 0)),
    across(`Hospital stays (days)`: `Hospital stays (days)`, ~ replace_na(.x, 0)),
    across(`Inpatient Residential home (days)`: `Inpatient Other (days)`, ~ replace_na(.x, 0)),
    across(`Family domestic (weeks)`: `Family practical (hours)`, ~ replace_na(.x, 0))
  ) # replace all values with NA with 0 or "No" for clarification

A separate dataset for the medication is developed in the long format to analyse medication use and further analyse the medication data reported in the iMCQ questionnaire.

Code

# New dataset for medication 
df.iMCQ.medication.long <- df.iMCQ.adapted %>%
  # Split `Medication kind` into separate medications
  mutate(Medication_kind_split = str_split(`Medication kind`, ";")) %>% # Use backticks for column name
  unnest_longer(Medication_kind_split) %>% # Unnest into longer format
  # Trim whitespace from split medication details
  mutate(Medication_kind_split = str_trim(Medication_kind_split)) %>%
  # Separate the details of each medication into columns
  separate(
    Medication_kind_split,
    into = c("Name", "Company", "Dose", "Frequency", "Days", "Swissmedic"),
    sep = "/", # Use '/' as the delimiter
    fill = "right", # Fill missing values with NA
    remove = TRUE # Remove the original column after splitting
  ) %>%
  # Convert Frequency and Days to numeric
  mutate(
    Frequency = as.numeric(Frequency),
    Days = as.numeric(Days)
  ) %>%
  # Select only the desired columns
  select(ID, `Year T0`, Name, Company, Dose, Frequency, Days, Swissmedic)

We also exclude the variable “Medication kind” in the df.iMCQ.adapted dataset. The relevant dataset for analysis of the iMCQ data is now “df.iMCQ.adapted.2”.

Code

df.iMCQ.adapted.2 <- df.iMCQ.adapted %>%
  # Remove the column `Medication kind`
  select(-`Medication kind`)

Check the selected variables for missing values.

Code

# Evaluate missing data in the iMCQ.adapted.2 dataset
DataExplorer::plot_missing(df.iMCQ.adapted.2, title = "Missing values iMCQ Switzerland")

Code

# Evaluate missing data in the medication dataset
DataExplorer::plot_missing(df.iMCQ.medication.long, title = "Missing values Medication")

Similar to the demographics dataset, the variables with missing values are those that were only specified if needed.

Code

for (var in names(df.iMCQ.adapted)) {
  if (is.numeric(df.iMCQ.adapted[[var]])) {
    barplot(table(df.iMCQ.adapted[[var]]), main = paste("", var))
  }
}

Not a normal distribution for all values.

A table to report healthcare usage is developed.

Code

# Develop a summary table for the iMCQ outcomes and combine the dataset on iMCQ with individual specific data from the demographics dataset (df.COI.demo.adaoted.2). The new dataset is now called df.iMCQ.adapted.2.
df.iMCQ.adapted.3 <- df.iMCQ.adapted.2 %>%
  left_join(df.COI.demo.adapted.2 %>%
              select(ID, Sex, Age, BMI, `BMI category`, `Duration of LBP (months)`), by = "ID") %>% 
  filter(`HC professional` == "Yes")

summary(df.iMCQ.adapted.3)

      ID               Year T0     HC professional GP or Medical assistant
 Length:34          Min.   :2023   No : 0          No : 1                 
 Class :character   1st Qu.:2023   Yes:34          Yes:33                 
 Mode  :character   Median :2024                                          
                    Mean   :2024                                          
                    3rd Qu.:2024                                          
                    Max.   :2025                                          
                                                                          
       GP        Medical assistant Social worker     Physical therapist
 Min.   :0.000   Min.   : 0.000    Min.   :0.00000   Min.   : 0.000    
 1st Qu.:1.000   1st Qu.: 0.000    1st Qu.:0.00000   1st Qu.: 0.000    
 Median :2.000   Median : 0.000    Median :0.00000   Median : 0.000    
 Mean   :1.588   Mean   : 1.324    Mean   :0.05882   Mean   : 2.235    
 3rd Qu.:2.000   3rd Qu.: 2.000    3rd Qu.:0.00000   3rd Qu.: 4.000    
 Max.   :4.000   Max.   :12.000    Max.   :2.00000   Max.   :12.000    
                                                                       
 Occupational therapist  Logopaedist  Nutritionist    Complementary medicine
 Min.   :0.0000         Min.   :0    Min.   :0.0000   Min.   : 0.000        
 1st Qu.:0.0000         1st Qu.:0    1st Qu.:0.0000   1st Qu.: 0.000        
 Median :0.0000         Median :0    Median :0.0000   Median : 0.000        
 Mean   :0.2647         Mean   :0    Mean   :0.1176   Mean   : 1.176        
 3rd Qu.:0.0000         3rd Qu.:0    3rd Qu.:0.0000   3rd Qu.: 0.000        
 Max.   :6.0000         Max.   :0    Max.   :2.0000   Max.   :15.000        
                                                                            
 Psychological care Company doctor     Care            Care kind 
 Min.   :0.0000     Min.   :0.00000   No :33   Domestic care: 1  
 1st Qu.:0.0000     1st Qu.:0.00000   Yes: 1   Personal care: 0  
 Median :0.0000     Median :0.00000            Nursing care : 0  
 Mean   :0.1176     Mean   :0.08824            NA's         :33  
 3rd Qu.:0.0000     3rd Qu.:0.00000                              
 Max.   :2.0000     Max.   :2.00000                              
                                                                 
 Domestic care (weeks) Personal care (weeks) Nursing care (weeks)
 Min.   : 0.0000       Min.   :0             Min.   :0           
 1st Qu.: 0.0000       1st Qu.:0             1st Qu.:0           
 Median : 0.0000       Median :0             Median :0           
 Mean   : 0.3529       Mean   :0             Mean   :0           
 3rd Qu.: 0.0000       3rd Qu.:0             3rd Qu.:0           
 Max.   :12.0000       Max.   :0             Max.   :0           
                                                                 
 Domestic care (hours/week) Personal care (hours/week)
 Min.   :0.00000            Min.   :0                 
 1st Qu.:0.00000            1st Qu.:0                 
 Median :0.00000            Median :0                 
 Mean   :0.08824            Mean   :0                 
 3rd Qu.:0.00000            3rd Qu.:0                 
 Max.   :3.00000            Max.   :0                 
                                                      
 Nursing care (hours/week) Medication Hospital Hospital (days)
 Min.   :0                 No : 4     No :31   Min.   :0      
 1st Qu.:0                 Yes:30     Yes: 3   1st Qu.:0      
 Median :0                                     Median :0      
 Mean   :0                                     Mean   :0      
 3rd Qu.:0                                     3rd Qu.:0      
 Max.   :0                                     Max.   :0      
                                                              
 Emergency department   Ambulance Outpatient clinic Outpatient clinic kind
 Min.   :0            Min.   :0   No :31            Length:34             
 1st Qu.:0            1st Qu.:0   Yes: 3            Class :character      
 Median :0            Median :0                     Mode  :character      
 Mean   :0            Mean   :0                                           
 3rd Qu.:0            3rd Qu.:0                                           
 Max.   :0            Max.   :0                                           
                                                                          
 Day clinic Day clinic kind Day clinic (days) Day care Day care kind     
 No :34     Mode:logical    Min.   :0         No :34   Length:34         
 Yes: 0     NA's:34         1st Qu.:0         Yes: 0   Class :character  
                            Median :0                  Mode  :character  
                            Mean   :0                                    
                            3rd Qu.:0                                    
                            Max.   :0                                    
                                                                         
 Day care Residential home (days) Day care Rehabilitation centre (days)
 Min.   :0                        Min.   :0                            
 1st Qu.:0                        1st Qu.:0                            
 Median :0                        Median :0                            
 Mean   :0                        Mean   :0                            
 3rd Qu.:0                        3rd Qu.:0                            
 Max.   :0                        Max.   :0                            
                                                                       
 Day care Psychiatric facility (days) Day care Other (days) Hospital stays
 Min.   :0                            Min.   :0             No :34        
 1st Qu.:0                            1st Qu.:0             Yes: 0        
 Median :0                            Median :0                           
 Mean   :0                            Mean   :0                           
 3rd Qu.:0                            3rd Qu.:0                           
 Max.   :0                            Max.   :0                           
                                                                          
 Hospital stays (days) Other inpatient stays Other inpatient stays kind
 Min.   :0             No :34                Length:34                 
 1st Qu.:0             Yes: 0                Class :character          
 Median :0                                   Mode  :character          
 Mean   :0                                                             
 3rd Qu.:0                                                             
 Max.   :0                                                             
                                                                       
 Inpatient Residential home (days) Inpatient Rehabilitation centre (days)
 Min.   :0                         Min.   :0                             
 1st Qu.:0                         1st Qu.:0                             
 Median :0                         Median :0                             
 Mean   :0                         Mean   :0                             
 3rd Qu.:0                         3rd Qu.:0                             
 Max.   :0                         Max.   :0                             
                                                                         
 Inpatient Psychiatric facility (days) Inpatient Other (days) Family help
 Min.   :0                             Min.   :0              No :33     
 1st Qu.:0                             1st Qu.:0              Yes: 1     
 Median :0                             Median :0                         
 Mean   :0                             Mean   :0                         
 3rd Qu.:0                             3rd Qu.:0                         
 Max.   :0                             Max.   :0                         
                                                                         
 Family help kind   Family domestic (weeks) Family personal (weeks)
 Length:34          Min.   : 0.0000         Min.   :0              
 Class :character   1st Qu.: 0.0000         1st Qu.:0              
 Mode  :character   Median : 0.0000         Median :0              
                    Mean   : 0.3824         Mean   :0              
                    3rd Qu.: 0.0000         3rd Qu.:0              
                    Max.   :13.0000         Max.   :0              
                                                                   
 Family practical (weeks) Family domestic (hours) Family personal (hours)
 Min.   :0                Min.   :0.00000         Min.   :0              
 1st Qu.:0                1st Qu.:0.00000         1st Qu.:0              
 Median :0                Median :0.00000         Median :0              
 Mean   :0                Mean   :0.05882         Mean   :0              
 3rd Qu.:0                3rd Qu.:0.00000         3rd Qu.:0              
 Max.   :0                Max.   :2.00000         Max.   :0              
                                                                         
 Family practical (hours) Means transport hospital
 Min.   :0                Length:34               
 1st Qu.:0                Class :character        
 Median :0                Mode  :character        
 Mean   :0                                        
 3rd Qu.:0                                        
 Max.   :0                                        
                                                  
 Distance from home to hospital     Sex          Age             BMI       
 Min.   : 1.000                 Male  :12   Min.   :24.00   Min.   :26.50  
 1st Qu.: 3.125                 Female:22   1st Qu.:39.75   1st Qu.:29.57  
 Median : 5.250                             Median :50.00   Median :30.95  
 Mean   : 7.417                             Mean   :48.03   Mean   :31.12  
 3rd Qu.:13.000                             3rd Qu.:56.00   3rd Qu.:32.58  
 Max.   :15.000                             Max.   :65.00   Max.   :39.70  
 NA's   :28                                                                
     BMI category Duration of LBP (months)
 Overweight:10    Min.   :  3.00          
 Obesity   :24    1st Qu.: 24.00          
                  Median : 38.00          
                  Mean   : 81.59          
                  3rd Qu.:120.00          
                  Max.   :324.00

Code

write.csv(df.iMCQ.adapted.2, "../exported_datasets/df.iMCQ.adapted.2.csv", row.names = TRUE)

11 iMCQ data valuation

As a second step, the healthcare usage needs to be monetised.

This process is conducted in two steps:

Valuation of medication
Valuation of the healthcare consultations excluding medication

11.1 Medication valuation

Medication is monetised in different ways:

Medication which is reported in the specialities list is reported via the reported prices of the SL list.
Medication which is not included in the specialities list is monetised through the reported costs from three different web-based pharmacies.

Importing reference datasets

Code

df.ref.package.size <- read_excel("../reference data/Medication_package_size.xlsx")

Code

df.medication.enriched <- df.medication.enriched %>%
  left_join(
    df.ref.package.size %>%
      mutate(Swissmedic = as.character(Swissmedic)) %>%
      select(Swissmedic, `Package size`),
    by = "Swissmedic")

df.medication.enriched <- df.medication.enriched %>%
  left_join(
    df.ref.package.size %>%
      mutate(Swissmedic = as.character(Swissmedic)) %>%
      select(Swissmedic, `Package size`),
    by = "Swissmedic"
  ) %>%
  mutate(`Package size` = ifelse(is.na(`Package size.y`), `Package size.x`, `Package size.y`)) %>%
  select(-`Package size.x`, -`Package size.y`) # Überflüssige Spalten entfernen

df.medication.enriched <- df.medication.enriched %>% 
  mutate(`Package size` = as.numeric(`Package size`))

Code

df.medication.enriched$`Price for  medication used (CHF)` <- 
  round((df.medication.enriched$`Price (CHF)` / df.medication.enriched$`Package size`) * df.medication.enriched$Frequency * df.medication.enriched$Days,2)

Price (2025 CHF) is the price that one participant paid for the medication that he/she used during the last three months.

Code

# Convert CHF from reference year to 2025 CHF - variable "Price (2025 CHF)"
df.medication.enriched <- df.medication.enriched %>%
  rowwise() %>%
  mutate(`Price (2025 CHF)` = round(convert_price_to_2025(`Price for  medication used (CHF)`, `Price year`), 2)) %>%
  ungroup()

Code

df.medication.enriched <- df.medication.enriched %>%
  mutate(`Price (2025 EUR)` = round(`Price (2025 CHF)` * var.conv.CHF.to.EUR, 2))

Code

df.medication.enriched <- df.medication.enriched %>%
  group_by(ID) %>%
  mutate(
    `Total med costs (2025 CHF)` = ifelse(row_number() == 1, round(sum(`Price (2025 CHF)`, na.rm = TRUE), 2), NA),
    `Total med costs (2025 EUR)` = ifelse(row_number() == 1, round(sum(`Price (2025 EUR)`, na.rm = TRUE), 2), NA)
  ) %>%
  ungroup()

Code

# Export the data frame to a CSV file
write.csv(df.medication.enriched, "../exported_datasets/df.COI.medication.enriched.csv", row.names = TRUE)

11.2 General iMCQ data valuation

Code

df.ref.iMCQ.valuation <- read_excel("../reference data/iMCQ_references_Switzerland.xlsx")

df.ref.iMCQ.valuation <- df.ref.iMCQ.valuation %>% 
  mutate(`Price (2025 CHF)` = round(`Price (2025 CHF)`, 2))

Code

# new dataset for monetisation of iMCQ data
df.iMCQ.enriched <- df.iMCQ.adapted.2 %>% 
  select(ID: `Distance from home to hospital`)

df.iMCQ.enriched <- df.iMCQ.enriched %>% 
    left_join(
    df.COI.demo.adapted.2 %>%
      select(ID, `BMI category`), 
    by = "ID"
  )  %>%
  relocate(`BMI category`, .after = ID)
  

# Lookup table for HC prices
df.iMCQ.HC.prices <- df.ref.iMCQ.valuation %>%
  select(`HC professional`, `Price year`, `Price (2025 CHF)`)

# Function for cost calculation
fct.iMCQ.HC.pricing <- function(hc_professional) {
  df.iMCQ.HC.prices %>%
    filter(`HC professional` == hc_professional) %>%
    pull(`Price (2025 CHF)`) %>%
    first()
  }

# Calculation of price for HC visits
df.iMCQ.enriched <- df.iMCQ.enriched %>%
  mutate(
    # GP
    `Costs GP (2025 CHF)` = GP * fct.iMCQ.HC.pricing("GP"),
    # Medical assistant
    `Costs Medical assistant (2025 CHF)` = `Medical assistant` * fct.iMCQ.HC.pricing("Medical assistant"),
    # Social worker
    `Costs Social worker (2025 CHF)` = `Social worker` * fct.iMCQ.HC.pricing("Social worker"),
    # PT
    `Costs Physical therapist (2025 CHF)` = case_when(
      `Physical therapist` >= 1 ~ fct.iMCQ.HC.pricing("Physical therapist_1st") +
        (pmax(0, `Physical therapist` - 1) * fct.iMCQ.HC.pricing("Physical therapist_general")),
      TRUE ~ 0
    ),
    # Occupational therapist
    `Costs occupational therapist (2025 CHF)` = `Occupational therapist` * fct.iMCQ.HC.pricing("Occupational therapist"),
    # Logopaedist
    `Costs Logopaedist (2025 CHF)` = case_when(
      `Logopaedist` > 0 ~ (`Logopaedist` * fct.iMCQ.HC.pricing("Logopaedist_general")) + fct.iMCQ.HC.pricing("Logopaedist_add_on_overall"),
      TRUE ~ 0
    ),
    # Nutritionist
    `Costs Nutritionist (2025 CHF)` = case_when(
      `Nutritionist` == 1 ~ fct.iMCQ.HC.pricing("Nutritionist_1st.cons"),
      `Nutritionist` >= 2 & `Nutritionist` <= 6 ~ fct.iMCQ.HC.pricing("Nutritionist_1st.cons") + 
        (`Nutritionist` - 1) * fct.iMCQ.HC.pricing("Nutritionist_2_6th.cons"),
      `Nutritionist` > 6 ~ fct.iMCQ.HC.pricing("Nutritionist_1st.cons") + 
        5 * fct.iMCQ.HC.pricing("Nutritionist_2_6th.cons") + 
        (`Nutritionist` - 6) * fct.iMCQ.HC.pricing("Nutritionist_>7.cons"),
      TRUE ~ 0
    ),
    # Complementary medicine
    `Costs Complementary medicine (2025 CHF)` = `Complementary medicine` * fct.iMCQ.HC.pricing("Complementary medicine"),
    # Psychological care
    `Costs Psychological care (2025 CHF)` = `Psychological care` * fct.iMCQ.HC.pricing("Psychological care"),
    # Company doctor
    `Costs Company doctor (2025 CHF)` = `Company doctor` * fct.iMCQ.HC.pricing("Company doctor"),
  # Care Costs based on Care kind
    `Costs Care (2025 CHF)` = case_when(
      Care == "No" ~ 0,
      Care == "Yes" & `Care kind` == "Domestic care" ~ `Domestic care (weeks)` * `Domestic care (hours/week)` * fct.iMCQ.HC.pricing("Domestic care"),
      Care == "Yes" & `Care kind` == "Personal care" ~ `Personal care (weeks)` * `Personal care (hours/week)` * fct.iMCQ.HC.pricing("Personal care"),
      Care == "Yes" & `Care kind` == "Nursing care" ~ `Nursing care (weeks)` * `Nursing care (hours/week)` * fct.iMCQ.HC.pricing("Nursing care"),
      TRUE ~ 0)) %>%
  rename(`Costs Occupational therapist (2025 CHF)` = `Costs occupational therapist (2025 CHF)`) %>% 
  # Medication
  left_join(
    df.medication.enriched %>%
      group_by(ID) %>%
      slice(1) %>%
      ungroup() %>%
      select(ID, `Med.count`, `Total med costs (2025 CHF)`) %>%
      rename(`Costs Medication (2025 CHF)` = `Total med costs (2025 CHF)`),
    by = "ID") %>%
  mutate(`Costs Medication (2025 CHF)` = replace_na(`Costs Medication (2025 CHF)`, 0)) %>% 
  # Supplements
  left_join(
    df.COI.supplements.enriched %>%
      group_by(ID) %>%
      slice(1) %>%
      ungroup() %>%
      select(ID, `Total suppl costs (2025 CHF)`) %>%
      rename(`Costs Supplements (2025 CHF)` = `Total suppl costs (2025 CHF)`),
    by = "ID") %>% 
  mutate(`Costs Supplements (2025 CHF)`= replace_na(`Costs Supplements (2025 CHF)`, 0))
  # No stationary hospital stays until now --> no monetisation of the following variables necessary: Hospital (days), Emergency, Hospital stays, other inpatient stays, Day clinic, Day care

Code

# For the monetisation of outpatient clinic data - we develop a long dataframe and do the monetisation according to the functio above

df.iMCQ.consultations.long <- df.iMCQ.adapted.2 %>%
  # Filter only rows where outpatient clinic is Yes
  filter(`Outpatient clinic` == "Yes") %>%
  # Split outpatient clinic kind into separate entries
  mutate(outpatient_clinic_kind_split = str_split(`Outpatient clinic kind`, ";")) %>%
  unnest_longer(outpatient_clinic_kind_split) %>%
  # Trim whitespace from split values
  mutate(outpatient_clinic_kind_split = str_trim(outpatient_clinic_kind_split)) %>%
  # Separate the details into columns
  separate(
    outpatient_clinic_kind_split,
    into = c("Name", "Number_of_consultations"),
    sep = "/", # Use '/' as the delimiter
    fill = "right", # Fill missing values with NA
    remove = TRUE # Remove the original column after splitting
  ) %>%
  # Convert Number_of_consultations to numeric
  mutate(Number_of_consultations = as.numeric(Number_of_consultations)) %>%
  # Calculate Costs  (2025 CHF)
  rowwise() %>%
  mutate(`Costs (2025 CHF)` = Number_of_consultations * (df.iMCQ.HC.prices %>%
    filter(`HC professional` == Name) %>%
    pull(`Price (2025 CHF)`) %>%
    first())) %>%
  ungroup() %>%
  # Select only the desired columns
  select(ID, Name, Number_of_consultations, `Costs (2025 CHF)`)

# Calculate the total costs of all consultations per ID
df.iMCQ.consultations.long <- df.iMCQ.consultations.long %>%
  group_by(ID) %>%
  mutate(
    `Costs outpatient clinic (2025 CHF)` = ifelse(row_number() == 1, round(sum(`Costs (2025 CHF)`, na.rm = TRUE), 2), NA),
  ) %>%
  ungroup()

# Now add the variable Costs outpatient clinic (2025 CHF) to the dataframe df.iMCQ.enriched
df.iMCQ.enriched <- df.iMCQ.enriched %>%
  mutate(`Costs outpatient clinic (2025 CHF)` = NA_real_) %>%  # Erstellt die Spalte, falls sie nicht existiert
  left_join(
    df.iMCQ.consultations.long %>%
      group_by(ID) %>%
      slice(1) %>%
      ungroup() %>%
      select(ID, `Costs outpatient clinic (2025 CHF)`),
    by = "ID"
  ) %>%
  mutate(`Costs outpatient clinic (2025 CHF)` = coalesce(`Costs outpatient clinic (2025 CHF).y`, `Costs outpatient clinic (2025 CHF).x`, 0)) %>%
  select(-`Costs outpatient clinic (2025 CHF).x`, -`Costs outpatient clinic (2025 CHF).y`) %>% 
  rename(`Costs Outpatient clinic (2025 CHF)` = `Costs outpatient clinic (2025 CHF)`)

# Not necessary, as no participant consumed any of those services within the last 3 months
#    `Hospital stays`, 
#    `Hospital stays (days)`, 
#    `Other inpatient stays`, 
#    `Other inpatient stays kind`, 
#    `Inpatient Residential home (days)`, 
#    `Inpatient Rehabilitation centre (days)`, 
#    `Inpatient Psychiatric facility (days)`, 
#    `Inpatient Other (days)`

# Develop a new variable called Direct medical costs (2025 CHF) which is the sum of all services consumed + recalculation into 2025 EUR
df.dir.med.costs <- df.iMCQ.enriched %>%
  mutate(
    `Direct medical costs (2025 CHF)` = rowSums(select(., 
      `Costs GP (2025 CHF)`, 
      `Costs Medical assistant (2025 CHF)`, 
      `Costs Social worker (2025 CHF)`, 
      `Costs Physical therapist (2025 CHF)`, 
      `Costs Occupational therapist (2025 CHF)`, 
      `Costs Logopaedist (2025 CHF)`, 
      `Costs Nutritionist (2025 CHF)`, 
      `Costs Complementary medicine (2025 CHF)`, 
      `Costs Psychological care (2025 CHF)`, 
      `Costs Company doctor (2025 CHF)`, 
      `Costs Outpatient clinic (2025 CHF)`, 
      `Costs Care (2025 CHF)`, 
      `Costs Medication (2025 CHF)`, 
      `Costs Supplements (2025 CHF)`), na.rm = TRUE),
    # Convert Costs into 2025 EUR
    `Direct medical costs (2025 EUR)` = round(`Direct medical costs (2025 CHF)` * var.conv.CHF.to.EUR, 2),
    `Costs GP (2025 EUR)` = round(`Costs GP (2025 CHF)` * var.conv.CHF.to.EUR, 2),
    `Costs Medical assistant (2025 EUR)` = round(`Costs Medical assistant (2025 CHF)` * var.conv.CHF.to.EUR, 2),
    `Costs Social worker (2025 EUR)` = round(`Costs Social worker (2025 CHF)` * var.conv.CHF.to.EUR, 2),
    `Costs Physical therapist (2025 EUR)` = round(`Costs Physical therapist (2025 CHF)` * var.conv.CHF.to.EUR, 2),
    `Costs Occupational therapist (2025 EUR)` = round(`Costs Occupational therapist (2025 CHF)` * var.conv.CHF.to.EUR, 2),
    `Costs Logopaedist (2025 EUR)` = round(`Costs Logopaedist (2025 CHF)` * var.conv.CHF.to.EUR, 2),
    `Costs Nutritionist (2025 EUR)` = round(`Costs Nutritionist (2025 CHF)` * var.conv.CHF.to.EUR, 2),
    `Costs Complementary medicine (2025 EUR)` = round(`Costs Complementary medicine (2025 CHF)` * var.conv.CHF.to.EUR, 2),
    `Costs Psychological care (2025 EUR)` = round(`Costs Psychological care (2025 CHF)` * var.conv.CHF.to.EUR, 2),
    `Costs Company doctor (2025 EUR)` = round(`Costs Company doctor (2025 CHF)` * var.conv.CHF.to.EUR, 2),
    `Costs Outpatient clinic (2025 EUR)` = round(`Costs Outpatient clinic (2025 CHF)` * var.conv.CHF.to.EUR, 2),
    `Costs Care (2025 EUR)` = round(`Costs Care (2025 CHF)` * var.conv.CHF.to.EUR, 2),
    `Costs Medication (2025 EUR)` = round(`Costs Medication (2025 CHF)` * var.conv.CHF.to.EUR, 2),
    `Costs Supplements (2025 EUR)` = round(`Costs Supplements (2025 CHF)` * var.conv.CHF.to.EUR, 2)
  ) %>%
  select(
    ID, `BMI category`, 
    `Costs GP (2025 CHF)`, `Costs GP (2025 EUR)`, 
    `Costs Medical assistant (2025 CHF)`, `Costs Medical assistant (2025 EUR)`, 
    `Costs Social worker (2025 CHF)`, `Costs Social worker (2025 EUR)`, 
    `Costs Physical therapist (2025 CHF)`, `Costs Physical therapist (2025 EUR)`, 
    `Costs Occupational therapist (2025 CHF)`, `Costs Occupational therapist (2025 EUR)`, 
    `Costs Logopaedist (2025 CHF)`, `Costs Logopaedist (2025 EUR)`, 
    `Costs Nutritionist (2025 CHF)`, `Costs Nutritionist (2025 EUR)`, 
    `Costs Complementary medicine (2025 CHF)`, `Costs Complementary medicine (2025 EUR)`, 
    `Costs Psychological care (2025 CHF)`, `Costs Psychological care (2025 EUR)`, 
    `Costs Company doctor (2025 CHF)`, `Costs Company doctor (2025 EUR)`, 
    `Costs Outpatient clinic (2025 CHF)`, `Costs Outpatient clinic (2025 EUR)`, 
    `Costs Care (2025 CHF)`, `Costs Care (2025 EUR)`, 
    `Costs Medication (2025 CHF)`, `Costs Medication (2025 EUR)`, 
    `Costs Supplements (2025 CHF)`, `Costs Supplements (2025 EUR)`, 
    `Direct medical costs (2025 CHF)`, `Direct medical costs (2025 EUR)`
  )

11.2.0.1 Descriptive data on healthcare usage iMCQ

Export table iMCQ descriptive

Code

# Export the data frame to a CSV file
write.csv(tbl.healthcare.consumption, "../tables/tbl.healthcare.consumption.csv", row.names = TRUE)

Code

df.imcq.descriptive <- rio::import("../tables/tbl.healthcare.consumption.csv")

df.imcq.descriptive %>% 
  flextable() %>%
  colformat_num(big.mark = "", digits = 0) %>%
  autofit() %>%
  bold(part = "header") %>%
  align(align = "center", part = "all") %>%
  line_spacing(space = 1.0) %>%
  fontsize(size = 8, part = "all") %>%
  font(fontname = "Times New Roman", part = "all") %>%
  print()

a flextable object.
col_keys: `V1`, `V2`, `V3`, `V4`, `V5` 
header has 1 row(s) 
body has 66 row(s) 
original dataset sample: 
  V1                  V2           V3             V4             V5
1 NA                  X.   Overweight        Obesity        Overall
2  1                           (N=17)         (N=35)         (N=52)
3  2                  GP                                           
4  3           Mean (SD) 0.941 (1.14)    1.09 (1.07)    1.04 (1.08)
5  4   Median [Min, Max]  0 [0, 3.00] 1.00 [0, 4.00] 1.00 [0, 4.00]

11.2.0.2 Summary table direct medical costs

Code

# Develop a summary table for direct medical costs with BMI category subgroups
vars.direct.med.costs <- c(
  "Costs GP (2025 CHF)", 
  "Costs Medical assistant (2025 CHF)", 
  "Costs Social worker (2025 CHF)", 
  "Costs Physical therapist (2025 CHF)", 
  "Costs Occupational therapist (2025 CHF)", 
  "Costs Logopaedist (2025 CHF)", 
  "Costs Nutritionist (2025 CHF)", 
  "Costs Complementary medicine (2025 CHF)", 
  "Costs Psychological care (2025 CHF)", 
  "Costs Company doctor (2025 CHF)", 
  "Costs Outpatient clinic (2025 CHF)", 
  "Costs Care (2025 CHF)", 
  "Costs Medication (2025 CHF)", 
  "Costs Supplements (2025 CHF)"
)

# Labels for cost categories
with(df.dir.med.costs, {
  label(`Costs GP (2025 CHF)`) <- "GP Costs (2025 CHF)"
  label(`Costs Medical assistant (2025 CHF)`) <- "Medical Assistant Costs (2025 CHF)"
  label(`Costs Social worker (2025 CHF)`) <- "Social Worker Costs (2025 CHF)"
  label(`Costs Physical therapist (2025 CHF)`) <- "Physical Therapist Costs (2025 CHF)"
  label(`Costs Occupational therapist (2025 CHF)`) <- "Occupational Therapist Costs (2025 CHF)"
  label(`Costs Logopaedist (2025 CHF)`) <- "Logopaedist Costs (2025 CHF)"
  label(`Costs Nutritionist (2025 CHF)`) <- "Nutritionist Costs (2025 CHF)"
  label(`Costs Complementary medicine (2025 CHF)`) <- "Complementary Medicine Costs (2025 CHF)"
  label(`Costs Psychological care (2025 CHF)`) <- "Psychological Care Costs (2025 CHF)"
  label(`Costs Company doctor (2025 CHF)`) <- "Company Doctor Costs (2025 CHF)"
  label(`Costs Outpatient clinic (2025 CHF)`) <- "Outpatient Clinic Costs (2025 CHF)"
  label(`Costs Care (2025 CHF)`) <- "Care Costs (2025 CHF)"
  label(`Costs Medication (2025 CHF)`) <- "Medication Costs (2025 CHF)"
  label(`Costs Supplements (2025 CHF)`) <- "Supplements Costs (2025 CHF)"
})

# Formula without empty spaces
formula.direct.med.costs <- as.formula(
  paste("~", paste0("`", vars.direct.med.costs, "`", collapse = " + "), "| `BMI category`")
)

# Table 1
tbl.dir.med.costs <- table1(
  formula.direct.med.costs,
  data = df.iMCQ.enriched,
  overall = "Overall",
  droplevels = FALSE)

tbl.dir.med.costs

	Overweight (N=17)	Obesity (N=35)	Overall (N=52)
Costs GP (2025 CHF)
Mean (SD)	146 (177)	168 (165)	161 (168)
Median [Min, Max]	0 [0, 465]	155 [0, 620]	155 [0, 620]
Costs Medical assistant (2025 CHF)
Mean (SD)	0 (0)	0 (0)	0 (0)
Median [Min, Max]	0 [0, 0]	0 [0, 0]	0 [0, 0]
Costs Social worker (2025 CHF)
Mean (SD)	0 (0)	2.35 (13.9)	1.58 (11.4)
Median [Min, Max]	0 [0, 0]	0 [0, 82.1]	0 [0, 82.1]
Costs Physical therapist (2025 CHF)
Mean (SD)	109 (199)	65.0 (118)	79.4 (149)
Median [Min, Max]	0 [0, 618]	0 [0, 470]	0 [0, 618]
Costs Occupational therapist (2025 CHF)
Mean (SD)	0 (0)	13.6 (59.2)	9.14 (48.7)
Median [Min, Max]	0 [0, 0]	0 [0, 317]	0 [0, 317]
Costs Logopaedist (2025 CHF)
Mean (SD)	0 (0)	0 (0)	0 (0)
Median [Min, Max]	0 [0, 0]	0 [0, 0]	0 [0, 0]
Costs Nutritionist (2025 CHF)
Mean (SD)	0 (0)	11.6 (40.3)	7.84 (33.4)
Median [Min, Max]	0 [0, 0]	0 [0, 192]	0 [0, 192]
Costs Complementary medicine (2025 CHF)
Mean (SD)	17.5 (72.1)	161 (466)	114 (389)
Median [Min, Max]	0 [0, 297]	0 [0, 2230]	0 [0, 2230]
Costs Psychological care (2025 CHF)
Mean (SD)	20.9 (86.4)	10.2 (60.2)	13.7 (69.1)
Median [Min, Max]	0 [0, 356]	0 [0, 356]	0 [0, 356]
Costs Company doctor (2025 CHF)
Mean (SD)	7.94 (32.7)	7.71 (45.6)	7.79 (41.5)
Median [Min, Max]	0 [0, 135]	0 [0, 270]	0 [0, 270]
Costs Outpatient clinic (2025 CHF)
Mean (SD)	27.4 (113)	42.0 (208)	37.2 (181)
Median [Min, Max]	0 [0, 465]	0 [0, 1210]	0 [0, 1210]
Costs Care (2025 CHF)
Mean (SD)	46.4 (191)	0 (0)	15.2 (109)
Median [Min, Max]	0 [0, 789]	0 [0, 0]	0 [0, 789]
Costs Medication (2025 CHF)
Mean (SD)	89.9 (176)	77.1 (84.9)	81.3 (121)
Median [Min, Max]	25.3 [0, 693]	58.5 [0, 365]	47.2 [0, 693]
Costs Supplements (2025 CHF)
Mean (SD)	8.36 (34.3)	17.8 (36.2)	14.7 (35.5)
Median [Min, Max]	0 [0, 142]	0 [0, 160]	0 [0, 160]

Code

# Export the data frame to a CSV file
write.csv(tbl.dir.med.costs, "../tables/tbl.dir.med.costs.csv", row.names = TRUE)

Code

# Valuation direct non-medical costs
#    `Family help`, 
#    `Family help kind`, 
#    `Family domestic (weeks)`, 
#    `Family personal (weeks)`, 
#    `Family practical (weeks)`, 
#    `Family domestic (hours)`, 
#    `Family personal (hours)`, 
#    `Family practical (hours)`
# Valuation transportation
#    `Means transport hospital`, 
#    `Distance from home to hospital`

df.iMCQ.enriched <- df.iMCQ.enriched %>%
  mutate(
    `Costs Informal Care (2025 CHF)` = case_when(
      `Family help` == "No" ~ 0,
      `Family help` == "Yes" & `Family help kind` == "Domestic care" ~ `Family domestic (weeks)` * `Family domestic (hours)` * fct.iMCQ.HC.pricing("Family care"), 
      `Family help` == "Yes" & `Family help kind` == "Personal care" ~ `Family personal (weeks)` * `Family personal (hours)` * fct.iMCQ.HC.pricing("Family care"),
      `Family help` == "Yes" & `Family help kind` == "Practical help" ~ `Family practical (weeks)` * `Family practical (hours)` * fct.iMCQ.HC.pricing("Family care"),
      TRUE ~ 0
    ),
    
    `Costs Transportation (2025 CHF)` = case_when(
      `Means transport hospital` == "Not applicable" ~ 0,
      `Means transport hospital` == "Pedestrian" ~ `Distance from home to hospital` * fct.iMCQ.HC.pricing("Pedestrian"),
      `Means transport hospital` == "Car" ~ `Distance from home to hospital` * fct.iMCQ.HC.pricing("Car"),
      `Means transport hospital` == "Public transport" ~ `Distance from home to hospital` * fct.iMCQ.HC.pricing("Public transport"),
      TRUE ~ 0
    ),
    `Costs Transportation (2025 CHF)` = round(`Costs Transportation (2025 CHF)`, 2))

# new dataset for direct non-medical costs
df.dir.non.med.costs <- df.iMCQ.enriched %>%
  mutate(
    # Calculate Direct non-medical costs (2025 CHF)
    `Direct non-medical costs (2025 CHF)` = rowSums(select(., 
      `Costs Informal Care (2025 CHF)`, 
      `Costs Transportation (2025 CHF)`), na.rm = TRUE),
    
    # Convert Costs into 2025 EUR
    `Costs Informal Care (2025 EUR)` = round(`Costs Informal Care (2025 CHF)` * var.conv.CHF.to.EUR, 2),
    `Costs Transportation (2025 EUR)` = round(`Costs Transportation (2025 CHF)` * var.conv.CHF.to.EUR, 2),
    `Direct non-medical costs (2025 EUR)` = round(`Direct non-medical costs (2025 CHF)` * var.conv.CHF.to.EUR, 2)
  ) %>%
  select(
    ID, `BMI category`, 
    `Costs Informal Care (2025 CHF)`, `Costs Informal Care (2025 EUR)`,
    `Costs Transportation (2025 CHF)`, `Costs Transportation (2025 EUR)`, 
    `Direct non-medical costs (2025 CHF)`, `Direct non-medical costs (2025 EUR)`)

11.2.0.3 Summary table direct non-medical costs

Code

# Develop a summary table for direct non-medical costs with BMI category subgroups
vars.direct.non.med.costs <- c(
  "Costs Informal Care (2025 CHF)", 
  "Costs Transportation (2025 CHF)",
  "Direct non-medical costs (2025 CHF)")

# Labels for cost categories
with(df.dir.non.med.costs, {
  label(`Costs Informal Care (2025 CHF)`) <- "Costs Informal Care (2025 CHF)"
  label(`Costs Transportation (2025 CHF)`)<- "Costs Transportation (2025 CHF)"
  label(`Direct non-medical costs (2025 CHF)`) <- "Direct non-medical costs (2025 CHF)"})

# Formula without empty spaces
formula.direct.non.med.costs <- as.formula(
  paste("~", paste0("`", vars.direct.non.med.costs, "`", collapse = " + "), "| `BMI category`"))

# Table 1
tbl.dir.non.med.costs <- table1(formula.direct.non.med.costs,
  data = df.dir.non.med.costs,
  overall = "Overall",
  droplevels = FALSE)

tbl.dir.non.med.costs

	Overweight (N=17)	Obesity (N=35)	Overall (N=52)
Costs Informal Care (2025 CHF)
Mean (SD)	0 (0)	28.2 (167)	19.0 (137)
Median [Min, Max]	0 [0, 0]	0 [0, 985]	0 [0, 985]
Costs Transportation (2025 CHF)
Mean (SD)	0.848 (2.82)	0.405 (1.94)	0.550 (2.25)
Median [Min, Max]	0 [0, 11.4]	0 [0, 11.4]	0 [0, 11.4]
Direct non-medical costs (2025 CHF)
Mean (SD)	0.848 (2.82)	28.6 (167)	19.5 (137)
Median [Min, Max]	0 [0, 11.4]	0 [0, 985]	0 [0, 985]

11.2.0.4 Export table direct non-medical costs

Code

# Export the data frame to a CSV file
write.csv(tbl.dir.non.med.costs, "../tables/tbl.dir.non.med.costs.csv", row.names = TRUE)

12 iPCQ data

Now the data on work productivity are analysed.

Code

df.iPCQ.coding <- rio::import("../coding/ipcq_ch_coding.csv")

Code

df.iPCQ.raw <- df.COI.filtered %>%
  select(general_id, general_t0_date, starts_with("ipcq"))

df.iPCQ.adapted <- df.iPCQ.raw %>% 
  mutate(
    # ID
    ID = general_id,
    # T0 year
    "Year T0" = lubridate::year(general_t0_date),
    # Paid employment
    "Paid employment" = factor(ipcq_ch_t02345_a6, levels = 0:1, labels = c("No", "Yes")),
    # Profession
    "Profession" = ipcq_ch_q1,
    # Working hours per week
    "Working (hours/week)" = as.numeric(ipcq_ch_q2, na.rm = TRUE),
    # Workdays per week
    "Workdays (week)" = as.numeric(ipcq_ch_q3, na.rm = TRUE),
    # Absenteeism last 3 months
    "Absenteeism" = factor(ifelse(is.na(ipcq_ch_q4_1), "No paid work", ipcq_ch_q4_1), 
                           levels = c(0, 1, "No paid work"), 
                           labels = c("No", "Yes", "No paid work")),
    # Absenteeism days last 3 months
    "Absenteeism (days)" = as.numeric(ipcq_ch_q4_2, na.rm = TRUE),
    # Absenteeism more than 3 months ago
    "Abstenteeism before 3 months" = factor(ipcq_ch_q5, levels = 0:1, labels = c("No", "Yes")),
    # Starting day of absenteeism more than 3 months ago
    "Abstenteeism before 3 months (day)" = ipcq_ch_q6,
    # Presenteeism
    "Presenteeism" = factor(ifelse(is.na(ipcq_ch_q7), "No paid work", ipcq_ch_q7), 
                            levels = c(0, 1, "No paid work"), 
                            labels = c("No", "Yes", "No paid work")),
    # Presenteeism days
    "Presenteeism (days)" = as.numeric(ipcq_ch_q8, na.rm = TRUE),
    # Presenteeism - workload fulfilment NAS0-10, 10 normal workload fulfilment
    "Presenteeism workload fulfilment" = as.numeric(ipcq_ch_q9, na.rm = TRUE),
    # Adjusted presenteeism workload fulfilment
    # adjusted variable: 0: no presenteeism and 10 maximal presenteeism instead of the other way around
    "Presenteeism workload fulfilment (adjusted)" = 1 - (`Presenteeism workload fulfilment` / 10),
    # Impairment of unpaid work
    "Impaired unpaid work" = factor(ipcq_ch_q10, levels = 0:1, labels = c("No", "Yes")),
    # Days of impaired unpaid work
    "Impaired unpaid work (days)" = as.numeric(ipcq_ch_q11, na.rm = TRUE),
    # Hours help by friends and family in impaired unpaid working days
    "Help during unpaid work (hours)" = as.numeric(ipcq_ch_q12, na.rm = TRUE)
  ) %>% 
  select(
    "ID", "Year T0", "Paid employment", "Profession", 
    "Working (hours/week)", "Workdays (week)", "Absenteeism", 
    "Absenteeism (days)", "Abstenteeism before 3 months", 
    "Abstenteeism before 3 months (day)", "Presenteeism", 
    "Presenteeism (days)", "Presenteeism workload fulfilment", 
    "Presenteeism workload fulfilment (adjusted)",
    "Impaired unpaid work", "Impaired unpaid work (days)", 
    "Help during unpaid work (hours)"
  ) %>% 
  mutate(
    across("Working (hours/week)":"Workdays (week)", ~replace_na(.x, 0)),
    across("Impaired unpaid work (days)":"Help during unpaid work (hours)", ~replace_na(.x, 0)),
    `Absenteeism (days)` = ifelse(is.na(`Absenteeism (days)`), 0, `Absenteeism (days)`),
    `Presenteeism (days)` = ifelse(is.na(`Presenteeism (days)`), 0, `Presenteeism (days)`)
  )




# new variable with reference year of SL and non SL prices
df.iPCQ.adapted$`Price year` <- 2023

Check the selected variables for missing values.

Code

DataExplorer::plot_missing(df.iPCQ.adapted, title = "Missing values iPCQ Switzerland")

Code

mice::md.pattern(df.iPCQ.adapted, rotate.names = TRUE)

   ID Year T0 Paid employment Profession Working (hours/week) Workdays (week)
5   1       1               1          1                    1               1
9   1       1               1          1                    1               1
21  1       1               1          1                    1               1
1   1       1               1          1                    1               1
1   1       1               1          1                    1               1
15  1       1               1          1                    1               1
    0       0               0          0                    0               0
   Absenteeism Absenteeism (days) Presenteeism Presenteeism (days)
5            1                  1            1                   1
9            1                  1            1                   1
21           1                  1            1                   1
1            1                  1            1                   1
1            1                  1            1                   1
15           1                  1            1                   1
             0                  0            0                   0
   Impaired unpaid work Impaired unpaid work (days)
5                     1                           1
9                     1                           1
21                    1                           1
1                     1                           1
1                     1                           1
15                    1                           1
                      0                           0
   Help during unpaid work (hours) Price year Presenteeism workload fulfilment
5                                1          1                                1
9                                1          1                                1
21                               1          1                                1
1                                1          1                                0
1                                1          1                                0
15                               1          1                                0
                                 0          0                               17
   Presenteeism workload fulfilment (adjusted) Abstenteeism before 3 months
5                                            1                            1
9                                            1                            1
21                                           1                            0
1                                            0                            1
1                                            0                            1
15                                           0                            0
                                            17                           36
   Abstenteeism before 3 months (day)    
5                                   1   0
9                                   0   1
21                                  0   2
1                                   1   2
1                                   0   3
15                                  0   4
                                   46 116

Code

par(mfrow = c(3, 3))  
for (var in names(df.iPCQ.adapted)) {
  if (is.numeric(df.iPCQ.adapted[[var]])) {
    barplot(table(df.iPCQ.adapted[[var]]), main = paste("", var))
  }
}

Code

# not a normal distribution?

13 Recalculation of costs reported in the iPCQ

The data on productivity need to be monetised as well in 2025 CHF and 2025 Belgium Euro for the calculation of indirect costs.

13.1 iPCQ valuation

The questionnaires covered only the last 3 months until baseline assessment.

Short-term sick leave (up to 6 months) was covered in this cost-of-illness study (van den Hout, 2010).

Societal perspective:

Costs of absenteeism were valued using the human capital approach (van den Hout, 2010).
It “considers the patient´s hours of productivity that are lost and calculates productivity costs as the product of those total lost hours with the hourly wage. Every hour not worked is an hour lost, possibly until the patient’s retirement age” (van den Hout, 2010).

The hourly wage is calculated using the following formula:

Hourly wage = [(Annual income before tax)/[(52*5) - (paid vacation days + public holidays)]/ hours of standard workday

In Switzerland, the following data are used:

average annual income before tax in Switzerland (latest available data from 2023):
- 2023 CHF 94’507
hours of standard workday: 8.4 hours
paid vacation days: 25 days
paid public holidays: 8 days (for Canton of Bern)

Bibliography:

OECD (2024), Average wages (indicator). doi: 10.1787/cc3e1387-en (Accessed on 19 February 2024)
Office FS. Wages, income from employment and labour costs: Schweizer Eidgenossenschaft; [Available from: https://www.bfs.admin.ch/bfs/en/home/statistics/work-income/wages-income-employment-labour-costs.html
Eidgenossenschaft S. Vacation, public holidays and absences from work [Available from: https://www.ch.ch/en/work/working-hours/vacation--public-holidays-and-absences-from-work/#further-information-and-contacts.
Employment LaSd. Working time and rest periods: Federal public service; [Available from: https://employment.belgium.be/en/themes/international/posting/working-conditions-be-respected-case-posting-belgium/working-time-and.
Employment LaSd. Minimum paid annual holidays: Federal public service; [Available from: https://employment.belgium.be/en/themes/international/posting/working-conditions-be-respected-case-posting-belgium/minimum-paid.
van den Hout WB. The value of productivity: human-capital versus friction-cost method Annals of the Rheumatic Diseases 2010;69:i89-i91.

Bibliography used to recalculate costs in other currency and price year:

Shemilt I, James T, Marcello M. A web-based tool for adjusting costs to a specific target currency and price year. Evidence & Policy. 2010;6(1):51-9. –> http://eppi.ioe.ac.uk/costconversion/default.aspx

Presenteeism is valued using the Osterhaus method with the following formula:

Reduced monthly presenteism days = (DWsx) * PRODsx* ED

DWsx: days worked with symptoms (ipcq_ch_q8)
PRODsx: productivity when working with symptoms (ipcq_ch_q9)/10
ED: daily earnings (calculated in valuation of absenteeism section) –> variable: daily_wage_ch

The following part calculated the absenteeism costs in 2023 CHF.

Code

# Absenteeism
# Average annual income in Switzerland in 2023 CHF (2023 US$ 83'332)
average_annual_income_CHF <- 94507
workday_hours <- 8.4
paid_vacation <- 25
paid_public_holidays <- 8

# Calculation of hourly wage based on formula used in Lutz et al. 2022 + making the relevant variables numeric values
hourly_wage_ch <- as.numeric(round((((average_annual_income_CHF)/((52*5)-(paid_vacation+paid_public_holidays)))/workday_hours),2))
daily_wage_ch <- as.numeric(round(hourly_wage_ch*workday_hours, 2))

# Valuation of absenteeism days (ipcq_ch_q4_2) in a new column called absenteeism_costs_CHF
df.iPCQ.adapted$`Costs absenteeism (CHF)` <- ifelse(is.na(df.iPCQ.adapted$`Absenteeism (days)`), 0, df.iPCQ.adapted$`Absenteeism (days)` * daily_wage_ch)

Code

# Presenteism
df.iPCQ.adapted$`Costs presenteeism (CHF)` <- ifelse(
  is.na(df.iPCQ.adapted$`Presenteeism (days)`) | is.na(df.iPCQ.adapted$`Presenteeism workload fulfilment`),
  0,
  (df.iPCQ.adapted$`Presenteeism (days)` * df.iPCQ.adapted$`Presenteeism workload fulfilment (adjusted)`) * daily_wage_ch
)

Unpaid work: Valuation according to xy with CHF 27.72 (2023 CHF) and €18.81 (2023 Belgium €) (acc. to Koopmanschap et al. 2008)

Code

houry_wage_unpaid_work_CHF <- 27.72
# Unpaid work
df.iPCQ.adapted$`Costs impaired unpaid work (CHF)` <- ifelse(is.na(df.iPCQ.adapted$`Impaired unpaid work (days)`)| is.na(df.iPCQ.adapted$`Help during unpaid work (hours)`),
                                                             0,
                                                             (df.iPCQ.adapted$`Impaired unpaid work (days)`*df.iPCQ.adapted$`Help during unpaid work (hours)`)*houry_wage_unpaid_work_CHF)

Indirect costs are the sum of costs for absenteeism and presenteeism (reported in 2023 CHF).

Code

df.iPCQ.adapted <- df.iPCQ.adapted %>% 
  mutate(
    `Indirect costs (CHF)` = `Costs absenteeism (CHF)` + `Costs presenteeism (CHF)`+ `Costs impaired unpaid work (CHF)`)

Now the indirect cost data are recalculated to 2025 CHF.

Code

# Convert CHF from reference year to 2025 CHF - variable "Price (2025 CHF)"
df.iPCQ.adapted <- df.iPCQ.adapted %>%
  rowwise() %>%
  mutate(
    `Indirect costs (2025 CHF)` = round(convert_price_to_2025(`Indirect costs (CHF)`, `Price year`), 2),
    `Costs absenteeism (2025 CHF)` = round(convert_price_to_2025(`Costs absenteeism (CHF)`, `Price year`), 2),
    `Costs presenteeism (2025 CHF)` = round(convert_price_to_2025(`Costs presenteeism (CHF)`, `Price year`), 2),
    `Costs impaired unpaid work (2025 CHF)` = round(convert_price_to_2025(`Costs impaired unpaid work (CHF)`, `Price year`), 2)
  ) %>%
  ungroup()

Code

# Convert Price (2025 CHF) to Price (2025 EUR)
df.iPCQ.adapted <- df.iPCQ.adapted %>%
  mutate(
    `Indirect costs (2025 EUR)` = round(`Indirect costs (2025 CHF)` * var.conv.CHF.to.EUR, 2),
    `Costs absenteeism (2025 EUR)` = round(`Costs absenteeism (2025 CHF)` * var.conv.CHF.to.EUR, 2),
    `Costs presenteeism (2025 EUR)` = round(`Costs presenteeism (2025 CHF)` * var.conv.CHF.to.EUR, 2),
    `Costs impaired unpaid work (2025 EUR)` = round(`Costs impaired unpaid work (2025 CHF)` * var.conv.CHF.to.EUR, 2)
  )

Code

# Reshape the dataset to long format
df.iPCQ.long <- df.iPCQ.adapted %>%
  pivot_longer(
    cols = c(`Absenteeism (days)`, `Presenteeism (days)`, `Presenteeism workload fulfilment`, 
             `Impaired unpaid work (days)`, `Help during unpaid work (hours)`, 
             `Costs absenteeism (2025 CHF)`, `Costs presenteeism (2025 CHF)`, `Costs impaired unpaid work (2025 CHF)`, `Indirect costs (2025 CHF)`),
    names_to = "Variable",
    values_to = "Value"
  )

# Separate data for non-cost variables
df.iPCQ.noncost <- df.iPCQ.long %>%
  filter(Variable %in% c("Absenteeism (days)", "Presenteeism (days)", 
                         "Presenteeism workload fulfilment", "Impaired unpaid work (days)", 
                         "Help during unpaid work (hours)"))

# Separate data for cost variables
df.iPCQ.cost <- df.iPCQ.long %>%
  filter(Variable %in% c("Costs absenteeism (2025 CHF)", "Costs presenteeism (2025 CHF)", "Costs impaired unpaid work (2025 CHF)", "Indirect costs (2025 CHF)"))

# Boxplot for non-cost variables
ggplot(df.iPCQ.noncost, aes(x = Variable, y = Value)) +
  geom_boxplot() +
  labs(title = "Boxplot of iPCQ Data Without Costs",
       x = "Variables",
       y = "Values") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Code

# Boxplot for cost variables
ggplot(df.iPCQ.cost, aes(x = Variable, y = Value)) +
  geom_boxplot() +
  labs(title = "Boxplot of iPCQ Data With Costs",
       x = "Variables",
       y = "Values (in 2025 CHF)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

A summary table reporting all relevant iPCQ data and indirect cost data (including absenteeism and presenteeism costs).

Code

# Format labels for table1
with(df.iPCQ.adapted, {
  label(`Working (hours/week)`) <- "Working hours per week"
  label(`Workdays (week)`) <- "Workdays per week"
  label(Absenteeism) <- "Absenteeism (yes/no)"
  label(`Absenteeism (days)`) <- "Absenteeism days in the last 3 months"
  label(Presenteeism) <- "Presenteeism (yes/no)"
  label(`Presenteeism (days)`) <- "Presenteeism days in the last 3 months"
  label(`Presenteeism workload fulfilment`) <- "Presenteeism workload fulfilment"
  label(`Impaired unpaid work`) <- "Impairments during unpaid work"
  label(`Impaired unpaid work (days)`) <- "Days with impaired unpaid work"
  label(`Help during unpaid work (hours)`) <- "Hours of help during unpaid work"
})


# Add the variables Sex, Age, BMI, BMI category and Duration of LBP to the dataset df.iPCQ.adapted
df.iPCQ.adapted <- df.iPCQ.adapted %>%
  left_join(df.COI.demo.adapted.2 %>%
              select(ID, Sex, Age, BMI, `BMI category`, `Duration of LBP (months)`), 
            by = "ID") %>% 
  relocate(Sex, Age, BMI, `BMI category`, `Duration of LBP (months)`, .after = ID)

# Create summary table stratified by BMI category
tbl.iPCQ.summary <- table1(~ `Working (hours/week)` + 
                              `Workdays (week)` + 
                              Absenteeism + 
                              `Absenteeism (days)` + 
                              `Presenteeism` + 
                              `Presenteeism (days)` + 
                              `Presenteeism workload fulfilment` + 
                              `Impaired unpaid work` + 
                              `Impaired unpaid work (days)` + 
                              `Help during unpaid work (hours)` | `BMI category`, 
                            data = df.iPCQ.adapted, 
                            overall = "Overall", 
                            droplevels = FALSE)

tbl.iPCQ.summary

	Overweight (N=17)	Obesity (N=35)	Overall (N=52)
Working (hours/week)
Mean (SD)	31.6 (13.4)	31.6 (13.7)	31.6 (13.5)
Median [Min, Max]	36.0 [0, 43.0]	35.0 [0, 50.0]	35.0 [0, 50.0]
Workdays (week)
Mean (SD)	3.65 (1.61)	3.99 (1.55)	3.88 (1.56)
Median [Min, Max]	4.00 [0, 5.00]	5.00 [0, 6.00]	4.25 [0, 6.00]
Absenteeism
No	10 (58.8%)	21 (60.0%)	31 (59.6%)
Yes	5 (29.4%)	11 (31.4%)	16 (30.8%)
No paid work	2 (11.8%)	3 (8.6%)	5 (9.6%)
Absenteeism (days)
Mean (SD)	3.76 (11.3)	1.66 (4.55)	2.35 (7.42)
Median [Min, Max]	0 [0, 47.0]	0 [0, 25.0]	0 [0, 47.0]
Presenteeism
No	3 (17.6%)	9 (25.7%)	12 (23.1%)
Yes	12 (70.6%)	23 (65.7%)	35 (67.3%)
No paid work	2 (11.8%)	3 (8.6%)	5 (9.6%)
Presenteeism (days)
Mean (SD)	8.06 (14.2)	7.77 (10.9)	7.87 (11.9)
Median [Min, Max]	3.00 [0, 54.0]	3.00 [0, 50.0]	3.00 [0, 54.0]
Presenteeism workload fulfilment
Mean (SD)	7.92 (1.83)	7.43 (2.02)	7.60 (1.94)
Median [Min, Max]	8.50 [5.00, 10.0]	8.00 [3.00, 10.0]	8.00 [3.00, 10.0]
Missing	5 (29.4%)	12 (34.3%)	17 (32.7%)
Impaired unpaid work
No	13 (76.5%)	22 (62.9%)	35 (67.3%)
Yes	4 (23.5%)	13 (37.1%)	17 (32.7%)
Impaired unpaid work (days)
Mean (SD)	2.88 (7.30)	7.51 (18.0)	6.00 (15.4)
Median [Min, Max]	0 [0, 24.0]	0 [0, 90.0]	0 [0, 90.0]
Help during unpaid work (hours)
Mean (SD)	0.765 (2.19)	3.14 (8.39)	2.37 (7.05)
Median [Min, Max]	0 [0, 9.00]	0 [0, 40.0]	0 [0, 40.0]

Code

# Export the data frame to a CSV file
write.csv(tbl.iPCQ.summary, "../tables/tbl.iPCQ.summary.csv", row.names = TRUE)
write.csv(df.iPCQ.adapted, "../exported_datasets/df.iPCQ.adapted.csv", row.names= TRUE)

14 Bootstrapping of the data

To adress uncertainty, the cost data were bootstrapped. This enabled a reporting of cost values with 95% CI values in a disaggregated form per individual and as a mean overall value.

Code

library(boot)
set.seed(123)
fct.boot.dir.med <- function(data, i) {
  df <- data[i, ]  # Bootstrap sample

  # Group by BMI category and calculate the mean for 'Costs absenteeism (2025 CHF)'
  means_by_category <- df %>%
    group_by(`BMI category`) %>%
    summarise(mean_cost = mean(`Direct medical costs (2025 CHF)`, na.rm = TRUE), median_cost = median(`Direct medical costs (2025 CHF)`, na.rm = TRUE))

  # Calculate the total mean
  total_mean <- mean(df$`Direct medical costs (2025 CHF)`, na.rm = TRUE)
  
  # Calculate the total median
  total_median <- median(df$`Direct medical costs (2025 CHF)`, na.rm = TRUE)

  # Return both category means and the total mean
  return(c(means_by_category$mean_cost, means_by_category$median_cost,total_mean, total_median))
}


# Bootstrapping for direct medical costs with 5000 iterations (Costs in 2025 CHF)
set.seed(123)
boot.dir.med <- boot(df.dir.med.costs, fct.boot.dir.med, R = 5000)

# Extract original bootstrap estimates (Costs in 2025 CHF)
t1_dir_med_mean_ov <- mean(boot.dir.med$t[, 1])  # t1: Mean cost for overweight
t2_dir_med_mean_obe <- mean(boot.dir.med$t[, 2])  # t2: Mean cost for obesity
t3_dir_med_median_ov <- mean(boot.dir.med$t[, 3])  # t3: Median cost for overweight
t4_dir_med_median_obe <- mean(boot.dir.med$t[, 4])  # t4: Median cost for obesity
t5_dir_med_total_mean <- mean(boot.dir.med$t[, 5])  # t5: Total mean cost
t6_dir_med_total_median <- mean(boot.dir.med$t[, 6])  # t6: Total median cost

# CI calculation formula: 2*mean - quantile
ci_formula_dir_med <- function(orig_mean, boot_samples) {
  lower <- 2 * orig_mean - quantile(boot_samples, probs = 0.975, na.rm = TRUE)
  upper <- 2 * orig_mean - quantile(boot_samples, probs = 0.025, na.rm = TRUE)
  return(c(lower, upper))
}

# Calculate CIs for mean-based stats
ci_dir_med_mean_ov      <- ci_formula_dir_med(t1_dir_med_mean_ov, boot.dir.med$t[, 1])
ci_dir_med_mean_obe     <- ci_formula_dir_med(t2_dir_med_mean_obe, boot.dir.med$t[, 2])
ci_dir_med_total_mean   <- ci_formula_dir_med(t5_dir_med_total_mean, boot.dir.med$t[, 5])

# Calculate IRQ for median
iqr_dir_med_median_ov <- quantile(boot.dir.med$t[, 3], probs = c(0.25, 0.75))  # Median Overweight
iqr_dir_med_median_obe <- quantile(boot.dir.med$t[, 4], probs = c(0.25, 0.75))  # Median Obesity
iqr_dir_med_total_median <- quantile(boot.dir.med$t[, 6], probs = c(0.25, 0.75))  # Total Median

# Create the results DataFrame (Costs in 2025 CHF)
df.boot.direct.medical.costs <- data.frame(
  Statistic = c(
    "Mean Direct medical Cost (Overweight)",
    "Mean Direct medical Cost (Obesity)",
    "Median Direct medical Cost (Overweight)",
    "Median Direct medical Cost (Obesity)",
    "Total Mean Direct medical Cost",
    "Total Median Direct medical Cost"
  ),
  Value = c(
    t1_dir_med_mean_ov, t2_dir_med_mean_obe,
    t3_dir_med_median_ov, t4_dir_med_median_obe,
    t5_dir_med_total_mean, t6_dir_med_total_median
  ),
  Lower = c(
    ci_dir_med_mean_ov[1], ci_dir_med_mean_obe[1],
    iqr_dir_med_median_ov[1], iqr_dir_med_median_obe[1],
    ci_dir_med_total_mean[1], iqr_dir_med_total_median[1]
  ),
  Upper = c(
    ci_dir_med_mean_ov[2], ci_dir_med_mean_obe[2],
    iqr_dir_med_median_ov[2], iqr_dir_med_median_obe[2],
    ci_dir_med_total_mean[2], iqr_dir_med_total_median[2]
  ),
  Interval_Type = c("95% CI", "95% CI", "IQR", "IQR", "95% CI", "IQR")
) %>%
  mutate(
    Value = round(Value, 2),
    Lower = round(Lower, 2),
    Upper = round(Upper, 2))

# View the results
print(df.boot.direct.medical.costs)

                                Statistic  Value  Lower  Upper Interval_Type
1   Mean Direct medical Cost (Overweight) 474.61 269.93 667.10        95% CI
2      Mean Direct medical Cost (Obesity) 577.02 355.51 768.09        95% CI
3 Median Direct medical Cost (Overweight) 438.26 335.38 540.43           IQR
4    Median Direct medical Cost (Obesity) 432.69 362.31 529.39           IQR
5          Total Mean Direct medical Cost 543.69 382.80 686.33        95% CI
6        Total Median Direct medical Cost 442.44 362.31 529.39           IQR

Code

set.seed(123)

# Bootstrap function
fct.boot.dir.non.med <- function(data, i) {
  df <- data[i, ]  # Bootstrap sample

  # Group by BMI category and calculate mean & median for 'Direct non-medical costs (2025 CHF)'
  means_by_category <- df %>%
    group_by(`BMI category`) %>%
    summarise(
      mean_cost = mean(`Direct non-medical costs (2025 CHF)`, na.rm = TRUE),
      median_cost = median(`Direct non-medical costs (2025 CHF)`, na.rm = TRUE)
    )

  # Calculate total mean & median
  total_mean <- mean(df$`Direct non-medical costs (2025 CHF)`, na.rm = TRUE)
  total_median <- median(df$`Direct non-medical costs (2025 CHF)`, na.rm = TRUE)

  # Return stats: mean overweight, mean obesity, median overweight, median obesity, total mean, total median
  return(c(means_by_category$mean_cost, means_by_category$median_cost, total_mean, total_median))
}

# Run bootstrap
set.seed(123)
boot.dir.non.med <- boot(df.dir.non.med.costs, fct.boot.dir.non.med, R = 5000)

# Extract original bootstrap estimates
t1_dir_non_med_mean_ov      <- mean(boot.dir.non.med$t[, 1])  # Mean overweight
t2_dir_non_med_mean_obe     <- mean(boot.dir.non.med$t[, 2])  # Mean obesity
t3_dir_non_med_median_ov    <- mean(boot.dir.non.med$t[, 3])  # Median overweight
t4_dir_non_med_median_obe   <- mean(boot.dir.non.med$t[, 4])  # Median obesity
t5_dir_non_med_total_mean   <- mean(boot.dir.non.med$t[, 5])  # Total mean
t6_dir_non_med_total_median <- mean(boot.dir.non.med$t[, 6])  # Total median

# CI formula: 2*mean - quantile
ci_formula_dir_nonmed <- function(orig_mean, boot_samples) {
  lower <- 2 * orig_mean - quantile(boot_samples, probs = 0.975, na.rm = TRUE)
  upper <- 2 * orig_mean - quantile(boot_samples, probs = 0.025, na.rm = TRUE)
  lower <- max(lower, 0)  # Prevent negative lower bound
  return(c(lower, upper))
}


# Calculate CIs for mean-based stats
ci_dir_non_med_mean_ov      <- ci_formula_dir_nonmed(t1_dir_non_med_mean_ov, boot.dir.non.med$t[, 1])
ci_dir_non_med_mean_obe     <- ci_formula_dir_nonmed(t2_dir_non_med_mean_obe, boot.dir.non.med$t[, 2])
ci_dir_non_med_total_mean   <- ci_formula_dir_nonmed(t5_dir_non_med_total_mean, boot.dir.non.med$t[, 5])

# IQR for median-based stats
iqr_dir_non_med_median_ov   <- quantile(boot.dir.non.med$t[, 3], probs = c(0.25, 0.75), na.rm = TRUE)
iqr_dir_non_med_median_obe  <- quantile(boot.dir.non.med$t[, 4], probs = c(0.25, 0.75), na.rm = TRUE)
iqr_dir_non_med_total_median <- quantile(boot.dir.non.med$t[, 6], probs = c(0.25, 0.75), na.rm = TRUE)

# Create results DataFrame
df.boot.direct.non.medical.costs <- data.frame(
  Statistic = c(
    "Mean Direct non-medical Cost (Overweight)",
    "Mean Direct non-medical Cost (Obesity)",
    "Median Direct non-medical Cost (Overweight)",
    "Median Direct non-medical Cost (Obesity)",
    "Total Mean Direct non-medical Cost",
    "Total Median Direct non-medical Cost"
  ),
  Value = c(
    t1_dir_non_med_mean_ov, t2_dir_non_med_mean_obe,
    t3_dir_non_med_median_ov, t4_dir_non_med_median_obe,
    t5_dir_non_med_total_mean, t6_dir_non_med_total_median
  ),
  Lower = c(
    ci_dir_non_med_mean_ov[1], ci_dir_non_med_mean_obe[1],
    iqr_dir_non_med_median_ov[1], iqr_dir_non_med_median_obe[1],
    ci_dir_non_med_total_mean[1], iqr_dir_non_med_total_median[1]
  ),
  Upper = c(
    ci_dir_non_med_mean_ov[2], ci_dir_non_med_mean_obe[2],
    iqr_dir_non_med_median_ov[2], iqr_dir_non_med_median_obe[2],
    ci_dir_non_med_total_mean[2], iqr_dir_non_med_total_median[2]
  ),
  Interval_Type = c("95% CI", "95% CI", "IQR", "IQR", "95% CI", "IQR")
) %>%
  mutate(
    Value = round(Value, 2),
    Lower = pmax(round(Lower, 2), 0),
    Upper = round(Upper, 2))

# Print results
print(df.boot.direct.non.medical.costs)

                                    Statistic Value Lower Upper Interval_Type
1   Mean Direct non-medical Cost (Overweight)  0.85     0  1.70        95% CI
2      Mean Direct non-medical Cost (Obesity) 28.40     0 56.76        95% CI
3 Median Direct non-medical Cost (Overweight)  0.00     0  0.00           IQR
4    Median Direct non-medical Cost (Obesity)  0.00     0  0.00           IQR
5          Total Mean Direct non-medical Cost 19.39     0 38.63        95% CI
6        Total Median Direct non-medical Cost  0.00     0  0.00           IQR

Code

# Export the data frame to a CSV file
write.csv(df.boot.direct.medical.costs, "../exported_datasets/tbl.boot.dir.medical.csv", row.names= TRUE)
write.csv(df.boot.direct.non.medical.costs, "../exported_datasets/tbl.boot.dir.non.medical.csv", row.names= TRUE)

This includes the Absenteeism and Presenteeism costs

14.0.1 Absenteeism

Code

set.seed(123)

# Bootstrap function
fct.boot.abs <- function(data, i) {
  df <- data[i, ]  # Bootstrap sample
  
  # Group by BMI category and calculate mean & median for 'Costs absenteeism (2025 CHF)'
  means_by_category <- df %>%
    group_by(`BMI category`) %>%
    summarise(
      mean_cost = mean(`Costs absenteeism (2025 CHF)`, na.rm = TRUE), 
      median_cost = median(`Costs absenteeism (2025 CHF)`, na.rm = TRUE)
    )
  
  # Calculate total mean & median
  total_mean <- mean(df$`Costs absenteeism (2025 CHF)`, na.rm = TRUE)
  total_median <- median(df$`Costs absenteeism (2025 CHF)`, na.rm = TRUE)
  
  # Return: mean overweight, mean obesity, median overweight, median obesity, total mean, total median
  return(c(means_by_category$mean_cost, means_by_category$median_cost, total_mean, total_median))
}

# Run bootstrap
set.seed(123)
boot.abs <- boot(df.iPCQ.adapted, fct.boot.abs, R = 5000)

# Extract original bootstrap estimates
t1_abs_mean_ov      <- mean(boot.abs$t[, 1])  # Mean overweight
t2_abs_mean_obe     <- mean(boot.abs$t[, 2])  # Mean obesity
t3_abs_median_ov    <- mean(boot.abs$t[, 3])  # Median overweight
t4_abs_median_obe   <- mean(boot.abs$t[, 4])  # Median obesity
t5_abs_total_mean   <- mean(boot.abs$t[, 5])  # Total mean
t6_abs_total_median <- mean(boot.abs$t[, 6])  # Total median

# CI formula for absenteeism: 2*mean - quantile, lower bound clipped at 0
ci_formula_abs <- function(orig_mean, boot_samples) {
  lower <- 2 * orig_mean - quantile(boot_samples, probs = 0.975, na.rm = TRUE)
  upper <- 2 * orig_mean - quantile(boot_samples, probs = 0.025, na.rm = TRUE)
  return(c(lower, upper))
}

# Calculate CIs for mean-based stats
ci_abs_mean_ov    <- ci_formula_abs(t1_abs_mean_ov, boot.abs$t[, 1])
ci_abs_mean_obe   <- ci_formula_abs(t2_abs_mean_obe, boot.abs$t[, 2])
ci_abs_total_mean <- ci_formula_abs(t5_abs_total_mean, boot.abs$t[, 5])

# IQR for median-based stats
iqr_abs_median_ov   <- quantile(boot.abs$t[, 3], probs = c(0.25, 0.75), na.rm = TRUE)
iqr_abs_median_obe  <- quantile(boot.abs$t[, 4], probs = c(0.25, 0.75), na.rm = TRUE)
iqr_abs_total_median <- quantile(boot.abs$t[, 6], probs = c(0.25, 0.75), na.rm = TRUE)

# Create results DataFrame
df.boot.indirect.abs <- data.frame(
  Statistic = c(
    "Mean Absenteeism Cost (Overweight)",
    "Mean Absenteeism Cost (Obesity)",
    "Median Absenteeism Cost (Overweight)",
    "Median Absenteeism Cost (Obesity)",
    "Total Mean Absenteeism Cost",
    "Total Median Absenteeism Cost"
  ),
  Value = c(
    t1_abs_mean_ov, t2_abs_mean_obe,
    t3_abs_median_ov, t4_abs_median_obe,
    t5_abs_total_mean, t6_abs_total_median
  ),
  Lower = c(
    ci_abs_mean_ov[1], ci_abs_mean_obe[1],
    iqr_abs_median_ov[1], iqr_abs_median_obe[1],
    ci_abs_total_mean[1], iqr_abs_total_median[1]
  ),
  Upper = c(
    ci_abs_mean_ov[2], ci_abs_mean_obe[2],
    iqr_abs_median_ov[2], iqr_abs_median_obe[2],
    ci_abs_total_mean[2], iqr_abs_total_median[2]
  ),
  Interval_Type = c("95% CI", "95% CI", "IQR", "IQR", "95% CI", "IQR")
) %>%
  mutate(
    Value = round(Value, 2),
    Lower = pmax(round(Lower, 2), 0),  # Ensure no negative lower bound
    Upper = round(Upper, 2)
  )

print(df.boot.indirect.abs)

                             Statistic   Value Lower   Upper Interval_Type
1   Mean Absenteeism Cost (Overweight) 1602.22  0.00 3062.50        95% CI
2      Mean Absenteeism Cost (Obesity)  705.91  0.00 1206.28        95% CI
3 Median Absenteeism Cost (Overweight)   34.58  0.00    0.00           IQR
4    Median Absenteeism Cost (Obesity)    5.54  0.00    0.00           IQR
5          Total Mean Absenteeism Cost 1000.39  2.63 1681.40        95% CI
6        Total Median Absenteeism Cost    0.81  0.00    0.00           IQR

14.0.2 Presenteeism

Code

set.seed(123)
fct.boot.pres <- function(data, i) {
  df <- data[i, ]  # Bootstrap sample

  # Group by BMI category and calculate the mean for 'Costs absenteeism (2025 CHF)'
  means_by_category <- df %>%
    group_by(`BMI category`) %>%
    summarise(mean_cost = mean(`Costs presenteeism (2025 CHF)`, na.rm = TRUE), median_cost = median(`Costs presenteeism (2025 CHF)`, na.rm = TRUE))

  # Calculate the total mean
  total_mean <- mean(df$`Costs presenteeism (2025 CHF)`, na.rm = TRUE)
  
  # Calculate the total median
  total_median <- median(df$`Costs presenteeism (2025 CHF)`, na.rm = TRUE)

  # Return both category means and the total mean
  return(c(means_by_category$mean_cost, means_by_category$median_cost,total_mean, total_median))
}


# Bootstrapping for presenteeism with 5000 iterations (Costs in 2025 CHF)
set.seed(123)
boot.pres <- boot(df.iPCQ.adapted, fct.boot.pres, R = 5000)

# Function to calculate CI and ensure lower bound >= 0
ci_formula_pres <- function(orig_mean, boot_samples) {
  lower <- 2 * orig_mean - quantile(boot_samples, probs = 0.975, na.rm = TRUE)
  upper <- 2 * orig_mean - quantile(boot_samples, probs = 0.025, na.rm = TRUE)
  return(c(lower, upper))
}

# Correct calls to CI function
ci_pres_mean_ov <- ci_formula_pres(
  orig_mean = mean(boot.pres$t[, 1], na.rm = TRUE),   # original estimate
  boot_samples = boot.pres$t[, 1]                     # bootstrap distribution
)

ci_pres_mean_obe <- ci_formula_pres(
  orig_mean = mean(boot.pres$t[, 2], na.rm = TRUE),
  boot_samples = boot.pres$t[, 2]
)

ci_pres_total_mean <- ci_formula_pres(
  orig_mean = mean(boot.pres$t[, 5], na.rm = TRUE),
  boot_samples = boot.pres$t[, 5]
)

# IQR for median values (unchanged)
iqr_pres_median_ov   <- quantile(boot.pres$t[, 3], probs = c(0.25, 0.75))
iqr_pres_median_obe  <- quantile(boot.pres$t[, 4], probs = c(0.25, 0.75))
iqr_pres_total_median <- quantile(boot.pres$t[, 6], probs = c(0.25, 0.75))

# Extract original bootstrap estimates
t1_pres_mean_ov       <- mean(boot.pres$t[, 1])
t2_pres_mean_obe      <- mean(boot.pres$t[, 2])
t3_pres_median_ov     <- mean(boot.pres$t[, 3])
t4_pres_median_obe    <- mean(boot.pres$t[, 4])
t5_pres_total_mean    <- mean(boot.pres$t[, 5])
t6_pres_total_median  <- mean(boot.pres$t[, 6])

# Create DataFrame
df.boot.indirect.pres <- data.frame(
  Statistic = c(
    "Mean Presenteeism Cost (Overweight)", 
    "Mean Presenteeism Cost (Obesity)", 
    "Median Presenteeism Cost (Overweight)", 
    "Median Presenteeism Cost (Obesity)", 
    "Total Mean Presenteeism Cost", 
    "Total Median Presenteeism Cost"
  ),
  Value = c(
    t1_pres_mean_ov, t2_pres_mean_obe, 
    t3_pres_median_ov, t4_pres_median_obe, 
    t5_pres_total_mean, t6_pres_total_median
  ),
  Lower = c(
    ci_pres_mean_ov[1], ci_pres_mean_obe[1], 
    iqr_pres_median_ov[1], iqr_pres_median_obe[1], 
    ci_pres_total_mean[1], iqr_pres_total_median[1]
  ),
  Upper = c(
    ci_pres_mean_ov[2], ci_pres_mean_obe[2], 
    iqr_pres_median_ov[2], iqr_pres_median_obe[2], 
    ci_pres_total_mean[2], iqr_pres_total_median[2]
  ),
  Interval_Type = c("95% CI", "95% CI", "IQR", "IQR", "95% CI", "IQR")
) %>%
  mutate(
    Value = round(Value, 2),
    Lower = pmax(round(Lower, 2), 0),  # Ensure no negative lower bound
    Upper = round(Upper, 2)
  )

# View results
print(df.boot.indirect.pres)

                              Statistic  Value  Lower   Upper Interval_Type
1   Mean Presenteeism Cost (Overweight) 677.49   0.00 1166.90        95% CI
2      Mean Presenteeism Cost (Obesity) 486.74 246.28  701.70        95% CI
3 Median Presenteeism Cost (Overweight) 212.40   0.00  404.54           IQR
4    Median Presenteeism Cost (Obesity) 195.79   0.00  340.67           IQR
5          Total Mean Presenteeism Cost 549.47 281.67  774.67        95% CI
6        Total Median Presenteeism Cost 181.61   0.00  340.66           IQR

14.0.3 Unpaid work

Code

set.seed(123)
fct.boot.unpaid.work <- function(data, i) {
  df <- data[i, ]  # Bootstrap sample

  # Group by BMI category and calculate the mean for 'Costs absenteeism (2025 CHF)'
  means_by_category <- df %>%
    group_by(`BMI category`) %>%
    summarise(mean_cost = mean(`Costs impaired unpaid work (2025 CHF)`, na.rm = TRUE), median_cost = median(`Costs impaired unpaid work (2025 CHF)`, na.rm = TRUE))

  # Calculate the total mean
  total_mean <- mean(df$`Costs impaired unpaid work (2025 CHF)`, na.rm = TRUE)
  
  # Calculate the total median
  total_median <- median(df$`Costs impaired unpaid work (2025 CHF)`, na.rm = TRUE)

  # Return both category means and the total mean
  return(c(means_by_category$mean_cost, means_by_category$median_cost,total_mean, total_median))
}


# Bootstrapping for presenteeism with 5000 iterations (Costs in 2025 CHF)
set.seed(123)
boot.unpaid.work <- boot(df.iPCQ.adapted, fct.boot.unpaid.work, R = 5000)

# Function to calculate CI and ensure lower bound >= 0
ci_calculate_unpaid_w <- function(boot_obj, index) {
  ci <- boot.ci(boot_obj, type = "perc", index = index)$perc[4:5]
  ci[1] <- max(ci[1], 0)  # Cap lower bound at 0
  return(ci)
}

# Calculate the 95% Confidence Intervals (Costs in 2025 CHF)
ci_unpaid_work_mean_ov    <- ci_calculate_unpaid_w(boot.unpaid.work, 1)  # Mean overweight
ci_unpaid_work_mean_obe   <- ci_calculate_unpaid_w(boot.unpaid.work, 2)  # Mean obesity
ci_unpaid_work_total_mean <- ci_calculate_unpaid_w(boot.unpaid.work, 5)  # Total mean

# IQR for median values (unchanged in calculation)
iqr_unpaid_work_median_ov   <- quantile(boot.unpaid.work$t[, 3], probs = c(0.25, 0.75))
iqr_unpaid_work_median_obe  <- quantile(boot.unpaid.work$t[, 4], probs = c(0.25, 0.75))
iqr_unpaid_work_total_median <- quantile(boot.unpaid.work$t[, 6], probs = c(0.25, 0.75))

# Extract original bootstrap estimates
t1_unpaid_work_mean_ov      <- mean(boot.unpaid.work$t[, 1])
t2_unpaid_work_mean_obe     <- mean(boot.unpaid.work$t[, 2])
t3_unpaid_work_median_ov    <- mean(boot.unpaid.work$t[, 3])
t4_unpaid_work_median_obe   <- mean(boot.unpaid.work$t[, 4])
t5_unpaid_work_total_mean   <- mean(boot.unpaid.work$t[, 5])
t6_unpaid_work_total_median <- mean(boot.unpaid.work$t[, 6])

# Create results DataFrame
df.boot.indirect.unpaid.work <- data.frame(
  Statistic = c(
    "Mean Unpaid Work Cost (Overweight)", 
    "Mean Unpaid Work Cost (Obesity)", 
    "Median Unpaid Work Cost (Overweight)", 
    "Median Unpaid Work Cost (Obesity)", 
    "Total Mean Unpaid Work Cost", 
    "Total Median Unpaid Work Cost"
  ),
  Value = c(
    t1_unpaid_work_mean_ov, t2_unpaid_work_mean_obe, 
    t3_unpaid_work_median_ov, t4_unpaid_work_median_obe, 
    t5_unpaid_work_total_mean, t6_unpaid_work_total_median
  ),
  Lower = c(
    ci_unpaid_work_mean_ov[1], ci_unpaid_work_mean_obe[1], 
    iqr_unpaid_work_median_ov[1], iqr_unpaid_work_median_obe[1], 
    ci_unpaid_work_total_mean[1], iqr_unpaid_work_total_median[1]
  ),
  Upper = c(
    ci_unpaid_work_mean_ov[2], ci_unpaid_work_mean_obe[2], 
    iqr_unpaid_work_median_ov[2], iqr_unpaid_work_median_obe[2], 
    ci_unpaid_work_total_mean[2], iqr_unpaid_work_total_median[2]
  ),
  Interval_Type = c("95% CI", "95% CI", "IQR", "IQR", "95% CI", "IQR")
) %>%
  mutate(
    Value = round(Value, 2),
    Lower = pmax(round(Lower, 2), 0),  # Force lower bound >= 0
    Upper = round(Upper, 2)
  )

# View results
print(df.boot.indirect.unpaid.work)

                             Statistic  Value  Lower   Upper Interval_Type
1   Mean Unpaid Work Cost (Overweight) 436.71   2.18 1237.28        95% CI
2      Mean Unpaid Work Cost (Obesity) 894.42 232.81 1872.04        95% CI
3 Median Unpaid Work Cost (Overweight)   0.54   0.00    0.00           IQR
4    Median Unpaid Work Cost (Obesity)   4.53   0.00    0.00           IQR
5          Total Mean Unpaid Work Cost 745.56 241.02 1436.80        95% CI
6        Total Median Unpaid Work Cost   0.09   0.00    0.00           IQR

Code

# Export the data frame to a CSV file
write.csv(df.boot.indirect.pres, "../exported_datasets/tbl.boot.indirect.pres.csv", row.names= TRUE)
write.csv(df.boot.indirect.abs, "../exported_datasets/tbl.boot.indirect.abs.csv", row.names= TRUE)
write.csv(df.boot.indirect.unpaid.work, "../exported_datasets/tbl.boot.indirect.unpaid.work.csv", row.names= TRUE)

Code

set.seed(123)
# Erstellen eines Vektors für die total costs in jeder BMI-Kategorie
total.costs.boot.overweight <- vector("numeric", 5000)
total.costs.boot.obesity <- vector("numeric", 5000)
total.costs.boot.societal.overall <- vector("numeric", 5000)

# Bootstrapping für jede BMI-Kategorie
set.seed(123)
for(i in 1:5000){
  
  # Bootstrapping für Overweight
  dir.med.overweight <- sample(filter(df.dir.med.costs, `BMI category` == "Overweight")$`Direct medical costs (2025 CHF)`, 
                               nrow(filter(df.dir.med.costs, `BMI category` == "Overweight")), replace = TRUE)
  dir.nonmed.overweight <- sample(filter(df.dir.non.med.costs, `BMI category` == "Overweight")$`Direct non-medical costs (2025 CHF)`, 
                                  nrow(filter(df.dir.non.med.costs, `BMI category` == "Overweight")), replace = TRUE)
  abs.overweight <- sample(filter(df.iPCQ.adapted, `BMI category` == "Overweight")$`Costs absenteeism (2025 CHF)`, 
                           nrow(filter(df.iPCQ.adapted, `BMI category` == "Overweight")), replace = TRUE)
  pres.overweight <- sample(filter(df.iPCQ.adapted, `BMI category` == "Overweight")$`Costs presenteeism (2025 CHF)`, 
                            nrow(filter(df.iPCQ.adapted, `BMI category` == "Overweight")), replace = TRUE)
  unpaid.work.overweight <- sample(filter(df.iPCQ.adapted, `BMI category` == "Overweight")$`Costs impaired unpaid work (2025 CHF)`, 
                            nrow(filter(df.iPCQ.adapted, `BMI category` == "Overweight")), replace = TRUE)
  
  total.costs.boot.overweight[i] <- mean(dir.med.overweight + dir.nonmed.overweight + abs.overweight + pres.overweight + unpaid.work.overweight)  
  
  # Bootstrapping für Obesity
  dir.med.obesity <- sample(filter(df.dir.med.costs, `BMI category` == "Obesity")$`Direct medical costs (2025 CHF)`, 
                            nrow(filter(df.dir.med.costs, `BMI category` == "Obesity")), replace = TRUE)
  dir.nonmed.obesity <- sample(filter(df.dir.non.med.costs, `BMI category` == "Obesity")$`Direct non-medical costs (2025 CHF)`, 
                               nrow(filter(df.dir.non.med.costs, `BMI category` == "Obesity")), replace = TRUE)
  abs.obesity <- sample(filter(df.iPCQ.adapted, `BMI category` == "Obesity")$`Costs absenteeism (2025 CHF)`, 
                        nrow(filter(df.iPCQ.adapted, `BMI category` == "Obesity")), replace = TRUE)
  pres.obesity <- sample(filter(df.iPCQ.adapted, `BMI category` == "Obesity")$`Costs presenteeism (2025 CHF)`, 
                         nrow(filter(df.iPCQ.adapted, `BMI category` == "Obesity")), replace = TRUE)
  unpaid.work.obesity <- sample(filter(df.iPCQ.adapted, `BMI category` == "Obesity")$`Costs impaired unpaid work (2025 CHF)`, 
                            nrow(filter(df.iPCQ.adapted, `BMI category` == "Obesity")), replace = TRUE)
  
  total.costs.boot.obesity[i] <- mean(dir.med.obesity + dir.nonmed.obesity + abs.obesity + pres.obesity + unpaid.work.obesity)  
  
  # Bootstrapping für Overall (für alle Kategorien)
  dir.med.overall <- sample(df.dir.med.costs$`Direct medical costs (2025 CHF)`, 
                            nrow(df.dir.med.costs), replace = TRUE)
  dir.nonmed.overall <- sample(df.dir.non.med.costs$`Direct non-medical costs (2025 CHF)`, 
                               nrow(df.dir.non.med.costs), replace = TRUE)
  abs.overall <- sample(df.iPCQ.adapted$`Costs absenteeism (2025 CHF)`, 
                        nrow(df.iPCQ.adapted), replace = TRUE)
  pres.overall <- sample(df.iPCQ.adapted$`Costs presenteeism (2025 CHF)`, 
                         nrow(df.iPCQ.adapted), replace = TRUE)
  unpaid.work.overall <- sample(df.iPCQ.adapted$`Costs impaired unpaid work (2025 CHF)`,
                                nrow(df.iPCQ.adapted), replace = TRUE)
  
  total.costs.boot.societal.overall[i] <- mean(dir.med.overall + dir.nonmed.overall + abs.overall + pres.overall + unpaid.work.overall)  
}

# Median and IRQ
med.total.costs.overweight <- median(total.costs.boot.overweight)
iqr.total.costs.overweight <- quantile(total.costs.boot.overweight, probs = c(0.25, 0.75))

med.total.costs.obesity <- median(total.costs.boot.obesity)
iqr.total.costs.obesity <- quantile(total.costs.boot.obesity, probs = c(0.25, 0.75))

med.total.costs.societal.overall <- median(total.costs.boot.societal.overall)
iqr.total.costs.societal.overall <- quantile(total.costs.boot.societal.overall, probs = c(0.25, 0.75))

# Berechnung der Konfidenzintervalle für jede BMI-Kategorie
# Define the basic bootstrap CI function
ci_formula_total <- function(orig_mean, boot_samples) {
  lower <- 2 * orig_mean - quantile(boot_samples, probs = 0.975, na.rm = TRUE)
  upper <- 2 * orig_mean - quantile(boot_samples, probs = 0.025, na.rm = TRUE)
  return(c(lower = lower, upper = upper))
}

# Compute original means
mean.overweight <- mean(total.costs.boot.overweight)
mean.obesity <- mean(total.costs.boot.obesity)
mean.societal.overall <- mean(total.costs.boot.societal.overall)

# Compute basic bootstrap CIs
ci.total.costs.overweight <- ci_formula_total(mean.overweight, total.costs.boot.overweight)
ci.total.costs.obesity <- ci_formula_total(mean.obesity, total.costs.boot.obesity)
ci.total.costs.societal.overall <- ci_formula_total(mean.societal.overall, total.costs.boot.societal.overall)

# Print results
# ci.total.costs.overweight
# ci.total.costs.obesity
# ci.total.costs.societal.overall


# Erstellen des DataFrames für die total costs in jeder BMI-Kategorie
df.boot.total <- rbind(
  data.frame(
    Statistic = "Total Societal Costs (Overweight)",
    Value = round(mean(total.costs.boot.overweight), 2),
    Lower = round(ci.total.costs.overweight[1], 2),
    Upper = round(ci.total.costs.overweight[2], 2)
  ),
  data.frame(
    Statistic = "Total Societal Costs (Obesity)",
    Value = round(mean(total.costs.boot.obesity), 2),
    Lower = round(ci.total.costs.obesity[1], 2),
    Upper = round(ci.total.costs.obesity[2], 2)
  ),
  data.frame(
    Statistic = "Total Societal Costs",
    Value = round(mean(total.costs.boot.societal.overall), 2),
    Lower = round(ci.total.costs.societal.overall[1], 2),
    Upper = round(ci.total.costs.societal.overall[2], 2)
  )
)

# Mittelwert + 95%-CI
df.boot.total.mean <- rbind(
  data.frame(
    Statistic = "Total Mean Societal Costs (Overweight)",
    Value = round(mean(total.costs.boot.overweight), 2),
    Lower = round(ci.total.costs.overweight[1], 2),
    Upper = round(ci.total.costs.overweight[2], 2),
    Interval_Type = "95% CI"
  ),
  data.frame(
    Statistic = "Total Mean Societal Costs (Obesity)",
    Value = round(mean(total.costs.boot.obesity), 2),
    Lower = round(ci.total.costs.obesity[1], 2),
    Upper = round(ci.total.costs.obesity[2], 2),
    Interval_Type = "95% CI"
  ),
  data.frame(
    Statistic = "Total Mean Societal Costs",
    Value = round(mean(total.costs.boot.societal.overall), 2),
    Lower = round(ci.total.costs.societal.overall[1], 2),
    Upper = round(ci.total.costs.societal.overall[2], 2),
    Interval_Type = "95% CI"
  )
)

# Median + IQR
df.boot.total.median <- rbind(
  data.frame(
    Statistic = "Total Median Societal Costs (Overweight)",
    Value = round(med.total.costs.overweight, 2),
    Lower = round(iqr.total.costs.overweight[1], 2),
    Upper = round(iqr.total.costs.overweight[2], 2),
    Interval_Type = "IQR"
  ),
  data.frame(
    Statistic = "Total Median Societal Costs (Obesity)",
    Value = round(med.total.costs.obesity, 2),
    Lower = round(iqr.total.costs.obesity[1], 2),
    Upper = round(iqr.total.costs.obesity[2], 2),
    Interval_Type = "IQR"
  ),
  data.frame(
    Statistic = "Total Median Societal Costs",
    Value = round(med.total.costs.societal.overall, 2),
    Lower = round(iqr.total.costs.societal.overall[1], 2),
    Upper = round(iqr.total.costs.societal.overall[2], 2),
    Interval_Type = "IQR"
  )
)

# Zusammenführen
df.boot.total.full <- rbind(df.boot.total.mean, df.boot.total.median)

# Ausgabe des DataFrames für alle total costs
print(df.boot.total.full)

                                            Statistic   Value   Lower   Upper
lower.97.5%    Total Mean Societal Costs (Overweight) 3194.86  399.25 5049.41
lower.97.5%1      Total Mean Societal Costs (Obesity) 2691.96 1501.60 3646.68
lower.97.5%2                Total Mean Societal Costs 2850.44 1667.17 3829.17
25%          Total Median Societal Costs (Overweight) 3043.84 2245.16 3974.46
25%1            Total Median Societal Costs (Obesity) 2648.21 2303.68 3035.18
25%2                      Total Median Societal Costs 2810.81 2456.60 3204.59
             Interval_Type
lower.97.5%         95% CI
lower.97.5%1        95% CI
lower.97.5%2        95% CI
25%                    IQR
25%1                   IQR
25%2                   IQR

15 Merge datasets regarding costs

Code

# Overall dataframe for all bootstrapped costs
df.boot.overall.raw <- bind_rows(
  df.boot.direct.medical.costs, 
  df.boot.direct.non.medical.costs, 
  df.boot.indirect.abs, 
  df.boot.indirect.pres,
  df.boot.indirect.unpaid.work,
  df.boot.total.full
)

# Create a new column 'BMI category' based on the 'Statistic' column
df.boot.overall.adapted <- df.boot.overall.raw %>%
  mutate(`BMI category` = case_when(
    grepl("\\(Overweight\\)", Statistic) ~ "Overweight",
    grepl("\\(Obesity\\)", Statistic) ~ "Obesity",
    grepl("\\Total ", Statistic) ~ "Overall"),
    Statistic = gsub("^Total | ^Total| \\(Overweight\\)| \\(Obesity\\)| \\Total", "", Statistic)) %>% 
  mutate(Mean = paste0(round(Value, 2), " [", round(Lower, 2), "; ", round(Upper, 2), "]"),
         Statistic = gsub("AbsenteeismCost", "Absenteeism Cost", Statistic)) %>%
  select(`BMI category`, Statistic, Mean) 
df.boot.overall.adapted <- df.boot.overall.adapted %>% 
  bind_rows(
    df.boot.overall.adapted %>% 
      filter(Statistic == "Mean Direct medical Cost") %>%
      mutate(Statistic = "Mean Healthcare Costs"),
    df.boot.overall.adapted %>% 
      filter(Statistic == "Median Direct medical Cost") %>%
      mutate(Statistic = "Median Healthcare Costs")
  )

Code

library(dplyr)
library(tidyr)
library(stringr)

tbl.cost.summary.CHF <- df.boot.overall.adapted %>%
  # Extract Measure (Mean/Median) and Category
  mutate(
    Measure = case_when(
      str_detect(Statistic, "Mean") ~ "Mean [95%CI]",
      str_detect(Statistic, "Median") ~ "Median [IQR]",
      TRUE ~ NA_character_
    ),
    Category = case_when(
  str_detect(Statistic, "Direct medical") ~ "Direct medical",
  str_detect(Statistic, "Direct non-medical") ~ "Direct non-medical",
  str_detect(Statistic, "Absenteeism") ~ "Absenteeism",
  str_detect(Statistic, "Presenteeism") ~ "Presenteeism",
  str_detect(Statistic, "Unpaid Work") ~ "Unpaid Work",
  str_detect(Statistic, "Healthcare") ~ "Healthcare",
  str_detect(Statistic, "Societal|Total Societal|Bootstrapped Costs") ~ "Societal",
  TRUE ~ NA_character_
)
  ) %>%
  # Pivot to wide format for BMI categories
  select(Category, Measure, `BMI category`, Mean) %>%
  pivot_wider(names_from = `BMI category`, values_from = Mean) %>%
  # Sort categories and ensure alternating Mean/Median
  arrange(factor(Category, levels = c(
    "Direct medical", "Direct non-medical", "Absenteeism",
    "Presenteeism", "Unpaid Work", "Healthcare", "Societal")), Measure) %>%
  # Add nested row structure: repeat Category only for Mean row
  mutate(`Cost category (2025 CHF)` = if_else(Measure == "Mean [95%CI]", Category, ""))%>%
  # Reorder and rename columns
  select(`Cost category (2025 CHF)`, Measure, Overweight, Obesity, Overall)

tbl.cost.summary.CHF

# A tibble: 14 × 5
   `Cost category (2025 CHF)` Measure      Overweight            Obesity Overall
   <chr>                      <chr>        <chr>                 <chr>   <chr>  
 1 "Direct medical"           Mean [95%CI] 474.61 [269.93; 667.… 577.02… 543.69…
 2 ""                         Median [IQR] 438.26 [335.38; 540.… 432.69… 442.44…
 3 "Direct non-medical"       Mean [95%CI] 0.85 [0; 1.7]         28.4 [… 19.39 …
 4 ""                         Median [IQR] 0 [0; 0]              0 [0; … 0 [0; …
 5 "Absenteeism"              Mean [95%CI] 1602.22 [0; 3062.5]   705.91… 1000.3…
 6 ""                         Median [IQR] 34.58 [0; 0]          5.54 [… 0.81 […
 7 "Presenteeism"             Mean [95%CI] 677.49 [0; 1166.9]    486.74… 549.47…
 8 ""                         Median [IQR] 212.4 [0; 404.54]     195.79… 181.61…
 9 "Unpaid Work"              Mean [95%CI] 436.71 [2.18; 1237.2… 894.42… 745.56…
10 ""                         Median [IQR] 0.54 [0; 0]           4.53 [… 0.09 […
11 "Healthcare"               Mean [95%CI] 474.61 [269.93; 667.… 577.02… 543.69…
12 ""                         Median [IQR] 438.26 [335.38; 540.… 432.69… 442.44…
13 "Societal"                 Mean [95%CI] 3194.86 [399.25; 504… 2691.9… 2850.4…
14 ""                         Median [IQR] 3043.84 [2245.16; 39… 2648.2… 2810.8…

15.0.1 Export summary table in 2025 CHF

Code

write.csv(tbl.cost.summary.CHF, "../tables/tbl.cost.summary.CHF.csv", row.names = TRUE)

Code

df.boot.overall.raw.EUR <- df.boot.overall.raw %>%
  mutate(
    Value = round(as.numeric(Value) * var.conv.CHF.to.EUR, 2),
    Lower = round(as.numeric(Lower) * var.conv.CHF.to.EUR, 2),
    Upper = round(as.numeric(Upper) * var.conv.CHF.to.EUR, 2)
  )

# Create a new column 'BMI category' based on the 'Statistic' column
df.boot.overall.EUR <- df.boot.overall.raw.EUR %>%
  mutate(`BMI category` = case_when(
    grepl("\\(Overweight\\)", Statistic) ~ "Overweight",
    grepl("\\(Obesity\\)", Statistic) ~ "Obesity",
    grepl("\\Total ", Statistic) ~ "Overall"
  ),
  Statistic = gsub("^Total | ^Total| \\(Overweight\\)| \\(Obesity\\)| \\Total", "", Statistic)
  ) %>% 
  mutate(
    Mean = paste0(round(Value, 2), " [", round(Lower, 2), "; ", round(Upper, 2), "]"),
    Statistic = gsub("AbsenteeismCost", "Absenteeism Cost", Statistic)
  ) %>%
  select(`BMI category`, Statistic, Mean) # Entfernt Value, CI_Lower, CI_Upper

df.boot.overall.EUR <- df.boot.overall.EUR %>%
  # Add one row for Healthcare Costs = Direct medical costs
  bind_rows(
    df.boot.overall.adapted %>% 
      filter(Statistic == "Mean Direct medical Cost") %>%
      mutate(Statistic = "Mean Healthcare Costs"),
    df.boot.overall.adapted %>% 
      filter(Statistic == "Median Direct medical Cost") %>%
      mutate(Statistic = "Median Healthcare Costs")
  )

Code

library(dplyr)
library(tidyr)
library(stringr)

tbl.cost.summary.EUR <- df.boot.overall.EUR %>%
  # Extract Measure (Mean/Median) and Category
  mutate(
    Measure = case_when(
      str_detect(Statistic, "Mean") ~ "Mean [95%CI]",
      str_detect(Statistic, "Median") ~ "Median [IQR]",
      TRUE ~ NA_character_
    ),
    Category = case_when(
  str_detect(Statistic, "Direct medical") ~ "Direct medical",
  str_detect(Statistic, "Direct non-medical") ~ "Direct non-medical",
  str_detect(Statistic, "Absenteeism") ~ "Absenteeism",
  str_detect(Statistic, "Presenteeism") ~ "Presenteeism",
  str_detect(Statistic, "Unpaid Work") ~ "Unpaid Work",
  str_detect(Statistic, "Healthcare") ~ "Healthcare",
  str_detect(Statistic, "Societal|Total Societal|Bootstrapped Costs") ~ "Societal",
  TRUE ~ NA_character_
)
  ) %>%
  # Keep only valid rows
  filter(!is.na(Measure), !is.na(Category)) %>%
  # Pivot to wide format for BMI categories
  select(Category, Measure, `BMI category`, Mean) %>%
  pivot_wider(names_from = `BMI category`, values_from = Mean) %>%
  # Sort categories and ensure alternating Mean/Median
  arrange(factor(Category, levels = c(
    "Direct medical", "Direct non-medical", "Absenteeism",
    "Presenteeism", "Unpaid Work", "Healthcare", "Societal")), Measure) %>%
  # Add nested row structure: repeat Category only for Mean row
  mutate(`Cost category (2025 EUR)` = if_else(Measure == "Mean [95%CI]", Category, "")) %>% 
  # Reorder and rename columns
  select(`Cost category (2025 EUR)`, Measure, Overweight, Obesity, Overall)

tbl.cost.summary.EUR

# A tibble: 14 × 5
   `Cost category (2025 EUR)` Measure      Overweight            Obesity Overall
   <chr>                      <chr>        <chr>                 <chr>   <chr>  
 1 "Direct medical"           Mean [95%CI] 348.31 [198.1; 489.5… 423.47… 399.01…
 2 ""                         Median [IQR] 321.63 [246.13; 396.… 317.55… 324.7 …
 3 "Direct non-medical"       Mean [95%CI] 0.62 [0; 1.25]        20.84 … 14.23 …
 4 ""                         Median [IQR] 0 [0; 0]              0 [0; … 0 [0; …
 5 "Absenteeism"              Mean [95%CI] 1175.85 [0; 2247.53]  518.06… 734.17…
 6 ""                         Median [IQR] 25.38 [0; 0]          4.07 [… 0.59 […
 7 "Presenteeism"             Mean [95%CI] 497.2 [0; 856.37]     357.21… 403.25…
 8 ""                         Median [IQR] 155.88 [0; 296.89]    143.69… 133.28…
 9 "Unpaid Work"              Mean [95%CI] 320.5 [1.6; 908.02]   656.4 … 547.16…
10 ""                         Median [IQR] 0.4 [0; 0]            3.32 [… 0.07 […
11 "Healthcare"               Mean [95%CI] 474.61 [269.93; 667.… 577.02… 543.69…
12 ""                         Median [IQR] 438.26 [335.38; 540.… 432.69… 442.44…
13 "Societal"                 Mean [95%CI] 2344.67 [293; 3705.7] 1975.6… 2091.9…
14 ""                         Median [IQR] 2233.84 [1647.7; 291… 1943.4… 2062.8…

15.0.2 Export summary table in 2025 EUR

Code

write.csv(tbl.cost.summary.EUR, "../tables/tbl.cost.summary.EUR.csv", row.names = TRUE)

16 Tables and figures

Code

# New dataset with mean values of disaggregated bootstrapped cost categories
df.mean.societal.CHF <- bind_rows(
  df.boot.direct.medical.costs %>%
    filter(Statistic == "Total Mean Direct medical Cost") %>%
    select(Statistic, Value) %>%
    mutate(`Cost category` = "Direct medical cost") %>% 
    rename(`Mean` = Value),
  
  df.boot.direct.non.medical.costs %>%
    filter(Statistic == "Total Mean Direct non-medical Cost") %>%
    select(Statistic, Value) %>%
    mutate(`Cost category` = "Direct non-medical cost") %>% 
    rename(`Mean` = Value),
  
  df.boot.indirect.abs %>% 
    filter(Statistic == "Total Mean Absenteeism Cost") %>% 
    select(Statistic, Value) %>% 
    mutate(`Cost category` = "Absenteeism cost") %>% 
    rename(`Mean` = Value),
  
  df.boot.indirect.pres %>% 
    filter(Statistic == "Total Mean Presenteeism Cost") %>% 
    select(Statistic, Value) %>% 
    mutate(`Cost category` = "Presenteeism cost") %>% 
    rename(`Mean` = Value),
  
  df.boot.indirect.unpaid.work %>% 
    filter(Statistic == "Total Mean Unpaid Work Cost") %>% 
    select(Statistic, Value) %>% 
    mutate(`Cost category` = "Unpaid Work cost") %>% 
    rename(`Mean` = Value))


# Percentage of total societal costs
mean.tot.soc <- sum(df.mean.societal.CHF$Mean)

df.mean.societal.CHF <- df.mean.societal.CHF %>% 
  mutate(Percentage = round(Mean / mean.tot.soc * 100, 2)) %>% 
    mutate(Percentage = round(Percentage, 0)) %>% 
  mutate(Mean = round(Mean, 0))

# Develop ggplot black and white
ggplot.tot.soc.CHF.bw <- ggplot(df.mean.societal.CHF, aes(x = "Mean costs", y = `Percentage`, fill = reorder(`Cost category`, `Percentage`))) +
  geom_bar(stat = "identity", width = 1, color = "white") +
  theme_void() +
  labs(fill = "Cost category (2025 CHF)") +
  scale_fill_grey() + 
  theme_bw() +           # Apply a black and white theme
  geom_text(aes(label = round(`Mean`, 2)), position = position_stack(vjust = 0.5), color = "black")

plot(ggplot.tot.soc.CHF.bw)

Code

ggsave("../figures/ggplot.tot.soc.CHF.bw.png", plot = ggplot.tot.soc.CHF.bw, width = 8, height = 6, dpi = 300)

Code

df.boot.overall.adapted.2 <- df.boot.overall.adapted %>%
  mutate(
    # Extract mean (number before the space)
    mean = as.numeric(str_extract(Mean, "^[0-9.]+")),
    # Extract lower bound (number after [ and before ;)
    lower = as.numeric(str_extract(Mean, "(?<=\\[)[0-9.]+")),
    # Extract upper bound (number after ; and before ])
    upper = as.numeric(str_extract(Mean, "(?<=; )[0-9.]+"))
  ) %>%
  filter(startsWith(Statistic, "Mean")) %>% 
    # Rename Statistic
  mutate(
    Statistic = case_when(
      Statistic == "Mean Direct medical Cost" ~ "Direct medical",
      Statistic == "Mean Direct non-medical Cost" ~ "Direct non-medical",
      Statistic == "Mean Absenteeism Cost" ~ "Absenteeism",
      Statistic == "Mean Presenteeism Cost" ~ "Presenteeism",
      Statistic == "Mean Unpaid Work Cost" ~ "Unpaid work",
      Statistic == "Mean Societal Costs" ~ "Societal",
      Statistic == "Mean Healthcare Costs" ~ "Healthcare",
      TRUE ~ Statistic
    )
  ) %>%
  # Remove Healthcare
  filter(Statistic != "Healthcare") %>%
  # Order the factor
 mutate(
    Statistic = factor(Statistic, levels = c(
      "Direct medical", "Direct non-medical", "Absenteeism", 
      "Presenteeism", "Unpaid work", "Societal"
    )),
    `BMI category` = factor(`BMI category`, 
                             levels = c("Overweight", "Obesity", "Overall"))  # <- enforce order
  )

# Develop boxplots with Jitter
fig.box.cost.overview.bw <- ggplot(df.boot.overall.adapted.2, aes(x = Statistic, y = mean, group = `BMI category`)) +
  geom_point(aes(shape = `BMI category`), position = position_dodge(width = 0.4), size = 3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), 
                width = 0.4, 
                position = position_dodge(width = 0.4)) +
  labs(x = "Cost category", 
       y = "Costs (2025 CHF)",
       shape = "Subgroups") +
  scale_y_continuous(breaks = seq(0, 6000, by = 1000)) +
  scale_shape_manual(values = c(16, 17, 18, 15)) +
  theme_bw() +
  theme(legend.position = "right")

plot(fig.box.cost.overview.bw)

Code

ggsave("../figures/fig.box.costs.subgroup.bw.png", plot = fig.box.cost.overview.bw, width = 8, height = 6, dpi = 300)

17 Percentages of cost contribution

Code

df.boot.overall.perc <- df.boot.overall.raw %>%
  # Keep only rows where Statistic starts with "Total Mean"
  filter(str_starts(Statistic, "Total Mean")) %>%
  # Exclude Societal Costs rows
  filter(!str_detect(Statistic, "Societal Costs")) %>%
  # Keep only Statistic and Value columns
  select(Statistic, Value) %>%
  # Calculate share of each cost relative to the sum of all costs
  mutate(perc = round(100 * Value / sum(Value), 2)) %>%
  # Sort by percentage
  arrange(desc(perc))

df.boot.overall.perc

                              Statistic   Value  perc
...1        Total Mean Absenteeism Cost 1000.39 35.00
...2        Total Mean Unpaid Work Cost  745.56 26.08
...3       Total Mean Presenteeism Cost  549.47 19.22
...4     Total Mean Direct medical Cost  543.69 19.02
...5 Total Mean Direct non-medical Cost   19.39  0.68

Code

# Percentage of indirect costs on total societal costs
df.boot.overall.perc %>%
  filter(Statistic %in% c("Total Mean Absenteeism Cost",
                          "Total Mean Unpaid Work Cost",
                          "Total Mean Presenteeism Cost")) %>%
  summarise(total_perc = sum(perc))

  total_perc
1       80.3

18 Rerun calculation for indirect costs via bootstrapping

Excluding participants 10046

Code

df.iPCQ.subset <- df.iPCQ.adapted %>%
  filter(ID != "BO2WL_F_10046")
  
# Bootstrapping with dataset without 10046
set.seed(123)
fct.boot.abs <- function(data, i) {
  df <- data[i, ]  # Bootstrap sample

  # Group by BMI category and calculate the mean for 'Costs absenteeism (2025 CHF)'
  means_by_category <- df %>%
    group_by(`BMI category`) %>%
    summarise(
      mean_cost = mean(`Costs absenteeism (2025 CHF)`, na.rm = TRUE), 
      median_cost = median(`Costs absenteeism (2025 CHF)`, na.rm = TRUE))

  # Calculate the total mean
  total_mean <- mean(df$`Costs absenteeism (2025 CHF)`, na.rm = TRUE)
  
  # Calculate the total median
  total_median <- median(df$`Costs absenteeism (2025 CHF)`, na.rm = TRUE)

  # Return both category means and the total mean
  return(c(means_by_category$mean_cost, means_by_category$median_cost,total_mean, total_median))
}

# Bootstrapping for presenteeism with 5000 iterations (Costs in 2025 CHF)
library(boot)
set.seed(123)
boot.abs.sub <- boot(df.iPCQ.subset, fct.boot.abs, R = 5000)

# Calculate the 95% Confidence Intervals (CI) (Costs in 2025 CHF)
set.seed(123)
ci_abs_mean_ov_sub <- boot.ci(boot.abs.sub, type = "perc", index = 1)$perc [4:5]  # CI for t1 (mean overweight)
ci_abs_mean_obe_sub <- boot.ci(boot.abs.sub, type = "perc", index = 2)$perc[4:5]  # CI for t2 (mean obesity)
ci_abs_median_ov_sub <- boot.ci(boot.abs.sub, type = "perc", index = 3)$perc[4:5]  # CI for t3 (median overweight)
ci_abs_median_obe_sub <- boot.ci(boot.abs.sub, type = "perc", index = 4)$perc[4:5]  # CI for t4 (median obesity)
ci_abs_total_mean_sub <- boot.ci(boot.abs.sub, type = "perc", index = 5)$perc[4:5]  # CI for t5 (total mean)
ci_abs_total_median_sub <- boot.ci(boot.abs.sub, type = "perc", index = 6)$perc[4:5]  # CI for t6 (total median)

# IRQ for median
iqr_abs_median_ov_sub <- quantile(boot.abs.sub$t[, 3], probs = c(0.25, 0.75))  # Median Overweight
iqr_abs_median_obe_sub <- quantile(boot.abs.sub$t[, 4], probs = c(0.25, 0.75))  # Median Obesity
iqr_abs_total_median_sub <- quantile(boot.abs.sub$t[, 6], probs = c(0.25, 0.75))  # Total Median

# Extract original bootstrap estimates (Costs in 2025 CHF)
t1_abs_mean_ov_sub <- mean(boot.abs.sub$t[, 1])  # t1: Mean cost for overweight
t2_abs_mean_obe_sub <- mean(boot.abs.sub$t[, 2])  # t2: Mean cost for obesity
t3_abs_median_ov_sub <- mean(boot.abs.sub$t[, 3])  # t3: Median cost for overweight
t4_abs_median_obe_sub <- mean(boot.abs.sub$t[, 4])  # t4: Median cost for obesity
t5_abs_total_mean_sub <- mean(boot.abs.sub$t[, 5])  # t5: Total mean cost
t6_abs_total_median_sub <- mean(boot.abs.sub$t[, 6])  # t6: Total median cost

# Create the results DataFrame (Costs in 2025 CHF)
df.boot.indirect.abs.sub <- data.frame(
  Statistic = c("Mean Absenteeism Cost (Overweight)", "Mean Absenteeism Cost (Obesity)", 
                "Median Absenteeism Cost (Overweight)", "Median Absenteeism Cost (Obesity)", 
                "Total Mean Absenteeism Cost", "Total Median Absenteeism Cost"),
  Value = c(t1_abs_mean_ov_sub, t2_abs_mean_obe_sub, t3_abs_median_ov_sub, t4_abs_median_obe_sub, 
            t5_abs_total_mean_sub, t6_abs_total_median_sub),
  Lower = c(ci_abs_mean_ov_sub[1], ci_abs_mean_obe_sub[1], ci_abs_median_ov_sub[1], 
               ci_abs_median_obe_sub[1], ci_abs_total_mean_sub[1], ci_abs_total_median_sub[1]),
  Upper = c(ci_abs_mean_ov_sub[2], ci_abs_mean_obe_sub[2], ci_abs_median_ov_sub[2], 
               ci_abs_median_obe_sub[2], ci_abs_total_mean_sub[2], ci_abs_total_median_sub[2]))

# Optionally round numeric columns
df.boot.indirect.abs.sub <- df.boot.indirect.abs.sub %>%
  mutate(across(where(is.numeric), ~round(.x, 2)))

# View the results
print(df.boot.indirect.abs.sub)

                             Statistic  Value  Lower   Upper
1   Mean Absenteeism Cost (Overweight) 454.66  35.49  922.64
2      Mean Absenteeism Cost (Obesity) 700.41 206.46 1437.19
3 Median Absenteeism Cost (Overweight)  12.39   0.00    0.00
4    Median Absenteeism Cost (Obesity)   6.47   0.00    0.00
5          Total Mean Absenteeism Cost 623.62 250.49 1135.56
6        Total Median Absenteeism Cost   0.34   0.00    0.00

19 Descriptive reporting of HC consumption

Code

# 1. Filter for people who consulted a healthcare professional
df.hc.users <- df.iMCQ.adapted.2 %>%
  filter(`HC professional` == "Yes")

with(df.hc.users, {
  label(`Year T0`) <- "Year of T0 measurement"
  label(`HC professional`) <- "Healthcare professional"
  label(`GP or Medical assistant`) <- "GP or Medical assistant"
  label(GP) <- "General Practitioner"
  label(`Medical assistant`) <- "Medical assistant"
  label(`Social worker`) <- "Social worker"
  label(`Physical therapist`) <- "Physical therapist"
  label(`Occupational therapist`) <- "Occupational therapist"
  label(Logopaedist) <- "Logopaedist"
  label(Nutritionist) <- "Nutritionist"
  label(`Complementary medicine`) <- "Complementary medicine"
  label(`Psychological care`) <- "Psychological care"
  label(`Company doctor`) <- "Company doctor"
  label(Care) <- "Care received"
  label(Medication) <- "Medication use (yes/no)"
  label(`Med.count`) <- "Number of medications used"
  label(Hospital) <- "Hospital visits (yes/no)"
  label(`Emergency department`) <- "Emergency department visits"
  label(Ambulance) <- "Ambulance use (yes/no)"
  label(`Outpatient clinic`) <- "Outpatient clinic visits"
  label(`Day clinic`) <- "Day clinic visits"
  label(`Day care`) <- "Day care received"
  label(`Hospital stays`) <- "Hospital stays (yes/no)"
  label(`Other inpatient stays`) <- "Other inpatient stays (yes/no)"
})

# Create summary table stratified by BMI category
tbl.hc.users <- table1(~  GP + 
                              `Medical assistant` + 
                              `Social worker` + 
                              `Physical therapist` + 
                              `Occupational therapist` + 
                              Logopaedist + 
                              Nutritionist + 
                              `Complementary medicine` + 
                              `Psychological care` + 
                              `Company doctor` + 
                              Care + 
                              Medication + 
                              `Med.count` + 
                              Hospital + 
                              `Emergency department` + 
                              Ambulance + 
                              `Outpatient clinic` + 
                              `Day clinic` + 
                              `Day care` + 
                              `Hospital stays` + 
                              `Other inpatient stays`|`BMI category`, 
                            data = df.hc.users, 
                            overall = "Overall", 
                            droplevels = FALSE)

tbl.hc.users

	Overweight (N=10)	Obesity (N=24)	Overall (N=34)
GP
Mean (SD)	1.60 (1.07)	1.58 (0.929)	1.59 (0.957)
Median [Min, Max]	2.00 [0, 3.00]	1.50 [0, 4.00]	2.00 [0, 4.00]
Medical assistant
Mean (SD)	1.40 (3.78)	1.29 (1.76)	1.32 (2.46)
Median [Min, Max]	0 [0, 12.0]	1.00 [0, 8.00]	0 [0, 12.0]
Social worker
Mean (SD)	0 (0)	0.0833 (0.408)	0.0588 (0.343)
Median [Min, Max]	0 [0, 0]	0 [0, 2.00]	0 [0, 2.00]
Physical therapist
Mean (SD)	3.50 (4.50)	1.71 (2.48)	2.24 (3.24)
Median [Min, Max]	1.50 [0, 12.0]	0 [0, 9.00]	0 [0, 12.0]
Occupational therapist
Mean (SD)	0 (0)	0.375 (1.35)	0.265 (1.14)
Median [Min, Max]	0 [0, 0]	0 [0, 6.00]	0 [0, 6.00]
Logopaedist
Mean (SD)	0 (0)	0 (0)	0 (0)
Median [Min, Max]	0 [0, 0]	0 [0, 0]	0 [0, 0]
Nutritionist
Mean (SD)	0 (0)	0.167 (0.482)	0.118 (0.409)
Median [Min, Max]	0 [0, 0]	0 [0, 2.00]	0 [0, 2.00]
Complementary medicine
Mean (SD)	0.200 (0.632)	1.58 (3.71)	1.18 (3.18)
Median [Min, Max]	0 [0, 2.00]	0 [0, 15.0]	0 [0, 15.0]
Psychological care
Mean (SD)	0.200 (0.632)	0.0833 (0.408)	0.118 (0.478)
Median [Min, Max]	0 [0, 2.00]	0 [0, 2.00]	0 [0, 2.00]
Company doctor
Mean (SD)	0.100 (0.316)	0.0833 (0.408)	0.0882 (0.379)
Median [Min, Max]	0 [0, 1.00]	0 [0, 2.00]	0 [0, 2.00]
Care
No	9 (90.0%)	24 (100%)	33 (97.1%)
Yes	1 (10.0%)	0 (0%)	1 (2.9%)
Medication
No	2 (20.0%)	2 (8.3%)	4 (11.8%)
Yes	8 (80.0%)	22 (91.7%)	30 (88.2%)
Med.count
Mean (SD)	2.38 (1.51)	3.14 (1.88)	2.93 (1.80)
Median [Min, Max]	2.00 [1.00, 5.00]	3.00 [1.00, 8.00]	2.50 [1.00, 8.00]
Missing	2 (20.0%)	2 (8.3%)	4 (11.8%)
Hospital
No	9 (90.0%)	22 (91.7%)	31 (91.2%)
Yes	1 (10.0%)	2 (8.3%)	3 (8.8%)
Emergency department
Mean (SD)	0 (0)	0 (0)	0 (0)
Median [Min, Max]	0 [0, 0]	0 [0, 0]	0 [0, 0]
Ambulance
Mean (SD)	0 (0)	0 (0)	0 (0)
Median [Min, Max]	0 [0, 0]	0 [0, 0]	0 [0, 0]
Outpatient clinic
No	9 (90.0%)	22 (91.7%)	31 (91.2%)
Yes	1 (10.0%)	2 (8.3%)	3 (8.8%)
Day clinic
No	10 (100%)	24 (100%)	34 (100%)
Yes	0 (0%)	0 (0%)	0 (0%)
Day care
No	10 (100%)	24 (100%)	34 (100%)
Yes	0 (0%)	0 (0%)	0 (0%)
Hospital stays
No	10 (100%)	24 (100%)	34 (100%)
Yes	0 (0%)	0 (0%)	0 (0%)
Other inpatient stays
No	10 (100%)	24 (100%)	34 (100%)
Yes	0 (0%)	0 (0%)	0 (0%)

20 Descriptive reporting of abs/pres

Code

df.abs <- df.iPCQ.adapted %>%
  filter(`Absenteeism` == "Yes")

df.pres <- df.iPCQ.adapted %>% 
  filter(`Presenteeism` == "Yes")

df.unpaid.work <- df.iPCQ.adapted %>% 
  filter(`Impaired unpaid work` == "Yes")


# Format labels for table1
with(df.iPCQ.adapted, {
  label(`Working (hours/week)`) <- "Working hours per week"
  label(`Workdays (week)`) <- "Workdays per week"
  label(Absenteeism) <- "Absenteeism (yes/no)"
  label(`Absenteeism (days)`) <- "Absenteeism days in the last 3 months"
  label(Presenteeism) <- "Presenteeism (yes/no)"
  label(`Presenteeism (days)`) <- "Presenteeism days in the last 3 months"
  label(`Presenteeism workload fulfilment`) <- "Presenteeism workload fulfilment"
  label(`Impaired unpaid work`) <- "Impairments during unpaid work"
  label(`Impaired unpaid work (days)`) <- "Days with impaired unpaid work"
  label(`Help during unpaid work (hours)`) <- "Hours of help during unpaid work"
})

# Create summary table stratified by BMI category
tbl.abs <- table1(~ `Working (hours/week)` + 
                              `Workdays (week)` + 
                              Absenteeism + 
                              `Absenteeism (days)` | `BMI category`, 
                            data = df.abs, 
                            overall = "Overall", 
                            droplevels = FALSE)
tbl.abs

	Overweight (N=5)	Obesity (N=11)	Overall (N=16)
Working (hours/week)
Mean (SD)	34.9 (6.05)	34.6 (9.17)	34.7 (8.12)
Median [Min, Max]	36.0 [26.0, 40.5]	38.0 [15.0, 42.0]	37.0 [15.0, 42.0]
Workdays (week)
Mean (SD)	3.80 (1.04)	4.36 (0.897)	4.19 (0.946)
Median [Min, Max]	4.00 [2.50, 5.00]	5.00 [2.50, 5.00]	4.50 [2.50, 5.00]
Absenteeism
No	0 (0%)	0 (0%)	0 (0%)
Yes	5 (100%)	11 (100%)	16 (100%)
No paid work	0 (0%)	0 (0%)	0 (0%)
Absenteeism (days)
Mean (SD)	12.8 (19.2)	5.27 (7.03)	7.63 (12.0)
Median [Min, Max]	5.00 [1.00, 47.0]	3.00 [1.00, 25.0]	3.50 [1.00, 47.0]

Code

tbl.pres <- table1(~ `Working (hours/week)` + 
                              `Workdays (week)` + 
                              `Presenteeism` + 
                              `Presenteeism (days)` + 
                              `Presenteeism workload fulfilment` | `BMI category`, 
                            data = df.pres, 
                            overall = "Overall", 
                            droplevels = FALSE)

tbl.pres

	Overweight (N=12)	Obesity (N=23)	Overall (N=35)
Working (hours/week)
Mean (SD)	35.8 (6.56)	35.1 (9.58)	35.3 (8.57)
Median [Min, Max]	38.0 [25.2, 43.0]	38.0 [5.00, 46.0]	38.0 [5.00, 46.0]
Workdays (week)
Mean (SD)	4.17 (0.913)	4.39 (0.953)	4.31 (0.932)
Median [Min, Max]	4.25 [2.50, 5.00]	5.00 [2.00, 5.00]	5.00 [2.00, 5.00]
Presenteeism
No	0 (0%)	0 (0%)	0 (0%)
Yes	12 (100%)	23 (100%)	35 (100%)
No paid work	0 (0%)	0 (0%)	0 (0%)
Presenteeism (days)
Mean (SD)	11.4 (15.9)	11.8 (11.5)	11.7 (13.0)
Median [Min, Max]	4.00 [2.00, 54.0]	10.0 [1.00, 50.0]	6.00 [1.00, 54.0]
Presenteeism workload fulfilment
Mean (SD)	7.92 (1.83)	7.43 (2.02)	7.60 (1.94)
Median [Min, Max]	8.50 [5.00, 10.0]	8.00 [3.00, 10.0]	8.00 [3.00, 10.0]

Code

tbl.unpaid.work <- table1(~ `Working (hours/week)` + 
                              `Impaired unpaid work` + 
                              `Impaired unpaid work (days)` + 
                              `Help during unpaid work (hours)` | `BMI category`, 
                            data = df.unpaid.work, 
                            overall = "Overall", 
                            droplevels = FALSE)


tbl.unpaid.work

	Overweight (N=4)	Obesity (N=13)	Overall (N=17)
Working (hours/week)
Mean (SD)	34.6 (5.06)	28.2 (13.6)	29.7 (12.3)
Median [Min, Max]	34.0 [29.4, 41.0]	32.5 [0, 42.0]	32.5 [0, 42.0]
Impaired unpaid work
No	0 (0%)	0 (0%)	0 (0%)
Yes	4 (100%)	13 (100%)	17 (100%)
Impaired unpaid work (days)
Mean (SD)	12.3 (11.4)	20.2 (25.3)	18.4 (22.7)
Median [Min, Max]	12.0 [1.00, 24.0]	10.0 [2.00, 90.0]	10.0 [1.00, 90.0]
Help during unpaid work (hours)
Mean (SD)	3.25 (3.86)	8.46 (12.3)	7.24 (11.0)
Median [Min, Max]	1.50 [1.00, 9.00]	5.00 [0, 40.0]	2.00 [0, 40.0]

21 Package Versions

Code

sessionInfo()

R version 4.5.0 (2025-04-11)
Platform: x86_64-apple-darwin20
Running under: macOS Sequoia 15.7.3

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Europe/Zurich
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] boot_1.3-31        flextable_0.9.9    corrplot_0.95      ggsci_3.2.0       
 [5] gridExtra_2.3      table1_1.4.3       readxl_1.4.5       eq5d_0.15.7       
 [9] rlang_1.1.6        lifecycle_1.0.4    naniar_1.1.0       DataExplorer_0.8.3
[13] Hmisc_5.2-3        pander_0.6.6       kableExtra_1.4.0   rio_1.2.3         
[17] lubridate_1.9.4    forcats_1.0.0      stringr_1.5.1      dplyr_1.1.4       
[21] purrr_1.0.4        readr_2.1.5        tidyr_1.3.1        tibble_3.2.1      
[25] ggplot2_3.5.2      tidyverse_2.0.0    pacman_0.5.1      

loaded via a namespace (and not attached):
  [1] RColorBrewer_1.1-3      rstudioapi_0.17.1       jsonlite_2.0.0         
  [4] shape_1.4.6.1           magrittr_2.0.3          jomo_2.7-6             
  [7] farver_2.1.2            nloptr_2.2.1            rmarkdown_2.29         
 [10] ragg_1.4.0              vctrs_0.6.5             minqa_1.2.8            
 [13] askpass_1.2.1           base64enc_0.1-3         htmltools_0.5.8.1      
 [16] haven_2.5.5             survey_4.4-2            broom_1.0.8            
 [19] cellranger_1.1.0        Formula_1.2-5           mitml_0.4-5            
 [22] htmlwidgets_1.6.4       uuid_1.2-1              networkD3_0.4.1        
 [25] igraph_2.1.4            iterators_1.0.14        pkgconfig_2.0.3        
 [28] Matrix_1.7-3            R6_2.6.1                fastmap_1.2.0          
 [31] rbibutils_2.3           digest_0.6.37           colorspace_2.1-1       
 [34] textshaping_1.0.1       labeling_0.4.3          timechange_0.3.0       
 [37] compiler_4.5.0          proxy_0.4-27            fontquiver_0.2.1       
 [40] withr_3.0.2             htmlTable_2.4.3         backports_1.5.0        
 [43] DBI_1.2.3               R.utils_2.13.0          pan_1.9                
 [46] MASS_7.3-65             openssl_2.3.3           tools_4.5.0            
 [49] foreign_0.8-90          zip_2.3.3               visdat_0.6.0           
 [52] nnet_7.3-20             R.oo_1.27.1             glue_1.8.0             
 [55] nlme_3.1-168            grid_4.5.0              checkmate_2.3.2        
 [58] cluster_2.1.8.1         generics_0.1.4          gtable_0.3.6           
 [61] labelled_2.14.1         tzdb_0.5.0              R.methodsS3_1.8.2      
 [64] class_7.3-23            data.table_1.17.8       hms_1.1.3              
 [67] xml2_1.3.8              utf8_1.2.5              foreach_1.5.2          
 [70] pillar_1.10.2           mitools_2.4             splines_4.5.0          
 [73] lattice_0.22-7          survival_3.8-3          tidyselect_1.2.1       
 [76] fontLiberation_0.1.0    knitr_1.50              fontBitstreamVera_0.1.1
 [79] reformulas_0.4.1        svglite_2.2.1           xfun_0.52              
 [82] stringi_1.8.7           yaml_2.3.10             evaluate_1.0.3         
 [85] codetools_0.2-20        officer_0.6.10          data.tree_1.1.0        
 [88] gdtools_0.4.2           cli_3.6.5               rpart_4.1.24           
 [91] systemfonts_1.2.3       Rdpack_2.6.4            Rcpp_1.0.14            
 [94] tableone_0.13.2         parallel_4.5.0          lme4_1.1-37            
 [97] glmnet_4.1-9            viridisLite_0.4.2       scales_1.4.0           
[100] e1071_1.7-16            mice_3.18.0