Before analyses

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## âś” forcats   1.0.0     âś” readr     2.1.5
## âś” ggplot2   3.5.2     âś” stringr   1.5.1
## âś” lubridate 1.9.4     âś” tibble    3.2.1
## âś” purrr     1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## âś– dplyr::filter() masks stats::filter()
## âś– dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
## Warning: package 'janitor' was built under R version 4.4.3
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(flextable)
## Warning: package 'flextable' was built under R version 4.4.3
## 
## Attaching package: 'flextable'
## 
## The following object is masked from 'package:purrr':
## 
##     compose
library(rlang)
## Warning: package 'rlang' was built under R version 4.4.3
## 
## Attaching package: 'rlang'
## 
## The following objects are masked from 'package:purrr':
## 
##     %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
##     flatten_raw, invoke, splice
library(stringr)
library(gtsummary)
## Warning: package 'gtsummary' was built under R version 4.4.3
## 
## Attaching package: 'gtsummary'
## 
## The following object is masked from 'package:flextable':
## 
##     continuous_summary
library(rstatix)
## 
## Attaching package: 'rstatix'
## 
## The following object is masked from 'package:janitor':
## 
##     make_clean_names
## 
## The following object is masked from 'package:stats':
## 
##     filter
library(ggplot2)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## 
## The following objects are masked from 'package:rstatix':
## 
##     desc, mutate
## 
## The following object is masked from 'package:gtsummary':
## 
##     mutate
## 
## The following object is masked from 'package:purrr':
## 
##     compact
## 
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
library(ggpubr)
## 
## Attaching package: 'ggpubr'
## 
## The following object is masked from 'package:plyr':
## 
##     mutate
## 
## The following objects are masked from 'package:flextable':
## 
##     border, font, rotate
#data <- read.csv("2025-07-10_GGB_Merged_PERSONALINFO_NewGGBParticipants_WITHGPS.csv")
GGB <- read.csv("C:/Users/thuyb/OneDrive/Desktop/Student Job/Dataset/2025-07-10_GGB_Merged_PERSONALINFO_NewGGBParticipants_WITHGPS.csv")
#labels <- read.csv("/Users/forllychavez/Library/Mobile Documents/com~apple~CloudDocs/Desktop/Escritorio - MacBook Air de Forlly/Master of Epidemiology/2nd Year/Student Job 2.1/Documents to read/Analyses GGB Forlly Romero/Dataset/GGB labels.csv")
GGB <- GGB %>% 
  clean_names()

#Age categories based on blv

GGB<- GGB %>% 
  mutate(
    age_blv = case_when(age %in% c(17:30) ~ '17-30',
                         age %in% c(31:45) ~ '31-45',
                         age %in% c(46:60) ~ '46-60',
                         age %in% c(61:92) ~ '61+'
                         ) %>% factor(levels = c('17-30', '31-45', '46-60','61+' )))

#Gender
GGB <- GGB %>%
  mutate(gender = factor(gender, 
                            levels = c("Man", 
                                       "Vrouw", 
                                       "Niet-binaire geslachtsidentiteit", 
                                       "Ik antwoord liever niet"),
                            ordered = TRUE))

# Income
GGB <- GGB %>%
  mutate(
    income = case_when(
      income %in% c("0 – 1.000 € per maand",
                    "1.001 – 2.000 € per maand",
                    "2.001 – 3.000 € per maand",
                    "3.001 – 4.000 € per maand",
                    "4.001 – 5.000 € per maand",
                    "5.001 – 6.000 € per maand",
                    "meer dan 6.000 € per maand",
                    "Ik antwoord liever niet") ~ income,
      TRUE ~ "Unknown"  # Everything else becomes "Unknown"
    ),
    income = factor(income,
                    levels = c("0 – 1.000 € per maand",
                               "1.001 – 2.000 € per maand",
                               "2.001 – 3.000 € per maand",
                               "3.001 – 4.000 € per maand",
                               "4.001 – 5.000 € per maand",
                               "5.001 – 6.000 € per maand",
                               "meer dan 6.000 € per maand",
                               "Ik antwoord liever niet",
                               "Unknown"),
                    ordered = TRUE)
  )

  
# Income binary 
GGB <- GGB %>%
  mutate(income_binary = case_when(
    income %in% c("0 – 1.000 € per maand",
                  "1.001 – 2.000 € per maand",
                  "2.001 – 3.000 € per maand") ~ "≤3000",
    income %in% c("3.001 – 4.000 € per maand",
                  "4.001 – 5.000 € per maand",
                  "5.001 – 6.000 € per maand",
                  "meer dan 6.000 € per maand") ~ ">3000",
    income == "Ik antwoord liever niet" ~ NA_character_,
    TRUE ~ NA_character_
  ))

# Education
GGB <- GGB %>%
  mutate(education = factor(education, 
                            levels = c("Geen", 
                                       "Lager onderwijs", 
                                       "Lager secundair onderwijs (getuigschrift 4de jaar)", 
                                       "Hoger secundair onderwijs",
                                       "Niet-universitair hoger onderwijs / bachelor",
                                       "Universitair onderwijs / master / doctoraat",
                                       "Ik antwoord liever niet / Geen antwoord"),
                            ordered = TRUE))

# Nationality 
GGB <- GGB %>% 
  mutate(nationality_belgian = as.factor(nationality_belgian)) %>%
  mutate(nationality = case_when(
  nationality_belgian == 1 ~ "Belgian National",
  is.na(nationality_belgian) ~ "Not Belgian National",
  TRUE ~"Not Belgian National"
))

# Noise annoyance
GGB <- GGB %>% 
  mutate(
    noise_annoyance_cat = case_when(
      noise_annoyance == 1  ~ 'Helemaal niet gehinderd',
      noise_annoyance == 2  ~ 'Niet gehinderd',
      noise_annoyance == 3  ~ 'Tamelijk gehinderd',
      noise_annoyance == 4  ~ 'Ernstig gehinderd',
      noise_annoyance == 5  ~ 'Extreem gehinderd'
    ))

# Awareness about own produced noise
GGB <- GGB %>% 
  mutate(
    bewustzijn_eigen_geluid_cat = factor(case_when(
      bewustzijn_eigen_geluid == 1  ~ 'Always',
      bewustzijn_eigen_geluid == 2  ~ 'Often',
      bewustzijn_eigen_geluid == 3  ~ 'Sometimes',
      bewustzijn_eigen_geluid == 4  ~ 'Rarely',
      bewustzijn_eigen_geluid == 5  ~ 'Never'),
      levels = c('Always', 'Often', 'Sometimes', 'Rarely', 'Never')))

# QUality of life 
GGB <- GGB %>% 
  mutate(
    quality_life = case_when( 
      gen_satisfaction == 1 ~ 'Zeer tevreden',
      gen_satisfaction == 2 ~ 'Tevreden',
      gen_satisfaction == 3 ~ 'Noch tevreden, noch ontevreden',
      gen_satisfaction == 4 ~ 'Niet tevreden',
      gen_satisfaction == 5 ~ 'Helemaal niet tevreden'
  ))

# Bedroom floor
GGB <- GGB %>%
  mutate(
    floor_bedroom = case_when(
      floor_bedroom %in% c("Ondergronds",
                    "Gelijkvloers",
                    "1ste verdieping",
                    "2de verdieping",
                    "3de verdieping",
                    "4de verdieping",
                    "5de verdieping",
                    "6de verdieping",
                    "7de verdieping",
                    "8de verdieping",
                    "Hoger dan 8ste verdieping") ~ floor_bedroom,
      TRUE ~ "Unknown"  # Everything else becomes "Unknown"
    ),
    floor_bedroom = factor(floor_bedroom,
                    levels = c("Ondergronds",
                    "Gelijkvloers",
                    "1ste verdieping",
                    "2de verdieping",
                    "3de verdieping",
                    "4de verdieping",
                    "5de verdieping",
                    "6de verdieping",
                    "7de verdieping",
                    "8de verdieping",
                    "Hoger dan 8ste verdieping",
                    "Unknown"),
                    ordered = TRUE)
  )

# Window street 
GGB <- GGB %>% 
  mutate(
    window_street = factor(case_when(
      window_street == 1 ~ "Yes",
      window_street == 0 ~ "No",
      TRUE ~ "Unknown"),
      levels = c("Yes", "No", "Unknown")))

# Bedroom windows at night
GGB <- GGB %>% 
  mutate(
    window_night_num = factor(case_when(
      window_night_num == 5 ~ "Always closed",
      window_night_num == 6 ~ "Mostly closed",
      window_night_num == 7 ~ "Mostly open",
      window_night_num == 8 ~ "Always open"),
      levels = c("Always closed", "Mostly closed", "Mostly open", "Always open")))

# Environmental noise affect your choice to close windows or window grilles at night
GGB <- GGB %>% 
  mutate(
    q8_7 = factor(case_when(
      q8_7 == 5 ~ "Yes, this is the main reason",
      q8_7 == 6 ~ "Yes, but due to other reasons",
      q8_7 == 7 ~ "No"),
      levels = c("Yes, this is the main reason", "Yes, but due to other reasons", "No")))

# General physical health status
GGB <- GGB %>% 
  mutate(
    q4_2 = factor(case_when(
      q4_2 == 1 ~ "Heel goed",
      q4_2 == 2 ~ "Eerder goed",
      q4_2 == 3 ~ "Gemiddeld",
      q4_2 == 4 ~ "Eerder slecht",
      q4_2 == 5 ~ "Heel slecht"),
      levels = c("Heel goed", "Eerder goed", "Gemiddeld", "Eerder slecht", "Heel slecht")))

# Chronic disorders
GGB <- GGB %>% 
  mutate(q4_4 = case_when(
    q4_4 == 1 ~ "Yes", 
    q4_4 == 2 ~ "No"
  ))

# Allergic disorders
GGB <- GGB %>% 
  mutate(q4_6 = case_when(
    q4_6 == 1 ~ "Yes", 
    q4_6 == 2 ~ "No"
  ))

GGB <- GGB %>% 
  mutate(q4_7 = case_when(
    q4_7 == 1 ~ "Allergische astma", 
    q4_7 == 2 ~ "Inhalatieallergie 
    (hooikoorts/pollen, huisstofmijt, dieren)",
    q4_7 == 3 ~ "Voedselallergie",
    q4_7 == 4 ~ "Eczeem, atopische dermatitis",
    q4_7 == 5 ~ "Andere"
  ))

# Driving frequency 
GGB <-GGB %>%
    mutate(q4_16 = case_when(
    q4_16 == 1 ~ "Nooit (al dan niet in het bezit van een rijbewijs)",
    q4_16 == 2 ~ "Minder dan jaarlijks",
    q4_16 == 3 ~ "Een of meerdere keren per jaar, maar niet maandelijks",
    q4_16 == 4 ~ "Een of meerdere keren per maand, maar niet wekelijks",
    q4_16 == 5 ~ "Een of meerdere keren per week, maar niet dagelijks",
    q4_16 == 6 ~ "Dagelijks"),
    q4_16 = factor(q4_16,
                     level = c("Nooit (al dan niet in het bezit van een rijbewijs)", "Minder dan jaarlijks", "Een of meerdere keren per jaar, maar niet maandelijks", "Een of meerdere keren per maand, maar niet wekelijks", "Een of meerdere keren per week, maar niet dagelijks", "Dagelijks" )))

# Participants' hearing status
GGB <-GGB %>% # right ear
    mutate(q7_2_1 = factor(case_when(
    q7_2_1 == 1 ~ "Normaal gehoor",
    q7_2_1 == 2 ~ "Verminderd of geen gehoor, maar zonder gebruik van hoortoestel/implantaat",
    q7_2_1 == 3 ~ "Met hoortoestel/ gehoorapparaat",
    q7_2_1 == 4 ~ "Met implantaat (vb. cochleair implantaat, middenoorimplantaat)"),
    level = c("Normaal gehoor", "Verminderd of geen gehoor, maar zonder gebruik van hoortoestel/implantaat", "Met hoortoestel/ gehoorapparaat", "Met implantaat (vb. cochleair implantaat, middenoorimplantaat)" )))

GGB <-GGB %>% # left ear
    mutate(q7_2_2 = factor(case_when(
    q7_2_2 == 1 ~ "Normaal gehoor",
    q7_2_2 == 2 ~ "Verminderd of geen gehoor, maar zonder gebruik van hoortoestel/implantaat",
    q7_2_2 == 3 ~ "Met hoortoestel/ gehoorapparaat",
    q7_2_2 == 4 ~ "Met implantaat (vb. cochleair implantaat, middenoorimplantaat)"),
    level = c("Normaal gehoor", "Verminderd of geen gehoor, maar zonder gebruik van hoortoestel/implantaat", "Met hoortoestel/ gehoorapparaat", "Met implantaat (vb. cochleair implantaat, middenoorimplantaat)" )))

# Hearing problem
GGB <-GGB %>% 
    mutate(q7_3 = factor(case_when(
    q7_3 == 1 ~ "Ik heb nooit gehoorproblemen",
    q7_3 == 2 ~ "Ik heb een (licht) gehoorprobleem of twijfel of ik er een heb"),
    level = c("Ik heb nooit gehoorproblemen", "Ik heb een (licht) gehoorprobleem of twijfel of ik er een heb")))

# Tinnitus
GGB <-GGB %>%
    mutate(q7_5 = factor(case_when(
    q7_5 == 1 ~ "Ja",
    q7_5 == 2 ~ "Ik weet het niet",
    q7_5 == 3 ~ "Nee"),
    level = c("Ja", "Ik weet het niet", "Nee")))

I. Sociodemographics

Number of participant

GGB %>%
  mutate(stad_grouped= ifelse(stad %in% c("Antwerpen", "Gent", "Leuven"), stad, "Other")) %>%
  tabyl(stad_grouped) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

stad_grouped

n

percent

Antwerpen

1,759

0.17350562

Gent

1,317

0.12990728

Leuven

560

0.05523772

Other

6,502

0.64134938

Total

10,138

1.00000000

Participants per age and gender

 GGB %>%
  tabyl(age_blv, gender) %>%                       
  adorn_totals(where = c("col", "row"))%>% 
  flextable() %>%
  add_header_row(values = c("Leeftijdsklasse", "Aantal steekproefpersonen"), 
                 colwidths = c(1, 5)) %>%         
  theme_vanilla() %>% 
  align(align = 'center', part = 'all')

Leeftijdsklasse

Aantal steekproefpersonen

age_blv

Man

Vrouw

Niet-binaire geslachtsidentiteit

Ik antwoord liever niet

Total

17-30

248

506

8

12

774

31-45

1,095

1,733

12

23

2,863

46-60

1,358

1,884

8

20

3,270

61+

1,774

1,438

1

18

3,231

Total

4,475

5,561

29

73

10,138

Family composition

tibble(
  Statistic = c("Mean", "Median", "Min", "Max"),
  Cohabitants_Adults = c(
    round(mean(GGB$cohabitants_adults, na.rm = TRUE), 2),
    round(median(GGB$cohabitants_adults, na.rm = TRUE), 2),
    round(min(GGB$cohabitants_adults, na.rm = TRUE), 2),
    round(max(GGB$cohabitants_adults, na.rm = TRUE), 0)),
  Cohabitants_Children = c(
    round(mean(GGB$cohabitants_children, na.rm = TRUE), 2),
    round(median(GGB$cohabitants_children, na.rm = TRUE), 2),
    round(min(GGB$cohabitants_children, na.rm = TRUE), 2),
    round(max(GGB$cohabitants_children, na.rm = TRUE), 0)),
   `P-value (t-test)` = c(format(t.test(GGB$cohabitants_adults, GGB$cohabitants_children)$p.value, digits = 3), rep("", 3))  # Replicate p-value for each row
) %>% 
  flextable() %>%
  theme_vanilla() %>%
  align(align = 'center', part = 'all')

Statistic

Cohabitants_Adults

Cohabitants_Children

P-value (t-test)

Mean

2.08

0.54

0

Median

2.00

0.00

Min

0.00

0.00

Max

250.00

10.00

# Create a box plot for comparison -> doesn't work well
boxplot(GGB$cohabitants_adults, GGB$cohabitants_children, 
        names = c("cohabitants_adults", "cohabitants_children"),
        main = "Boxplot Comparison of cohabitants_adults and cohabitants_children",
        ylab = "Values",
        col = c("#FCB8B6", "#F72C26"),
        outline = FALSE)

Nationality

GGB %>%
  tabyl(nationality) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

nationality

n

percent

Belgian National

9,764

0.96310909

Not Belgian National

374

0.03689091

Total

10,138

1.00000000

Educational level

GGB %>%
  tabyl(education) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

education

n

percent

valid_percent

Geen

18

0.001775498

0.001794080

Lager onderwijs

54

0.005326494

0.005382239

Lager secundair onderwijs (getuigschrift 4de jaar)

283

0.027914776

0.028206917

Hoger secundair onderwijs

1,862

0.183665417

0.185587561

Niet-universitair hoger onderwijs / bachelor

3,608

0.355888735

0.359613276

Universitair onderwijs / master / doctoraat

4,208

0.415072006

0.419415927

Ik antwoord liever niet / Geen antwoord

0

0.000000000

0.000000000

105

0.010357072

Total

10,138

1.000000000

1.000000000

Work situation

GGB %>%
  tabyl(employment) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

employment

n

percent

Anders

389

0.03837049

Ik ben arbeidsongeschikt (ziekte, invaliditeit)

536

0.05287039

Ik ben gepensioneerd (ook vervroegd pensioen of brugpensioen)

2,307

0.22755968

Ik ben huisvrouw of huisman

126

0.01242849

Ik ben scholier of student

203

0.02002367

Ik ben werkzoekend

127

0.01252713

Ik werk als zelfstandige, zelfstandig helper of als ondernemer

872

0.08601302

Ik werk deeltijds in loondienst

1,422

0.14026435

Ik werk voltijds in loondienst

4,156

0.40994279

Total

10,138

1.00000000

Income

GGB %>%
  tabyl(income) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

income

n

percent

0 – 1.000 € per maand

32

0.003156441

1.001 – 2.000 € per maand

783

0.077234168

2.001 – 3.000 € per maand

1,610

0.158808443

3.001 – 4.000 € per maand

1,674

0.165121326

4.001 – 5.000 € per maand

1,867

0.184158611

5.001 – 6.000 € per maand

1,309

0.129118169

meer dan 6.000 € per maand

1,045

0.103077530

Ik antwoord liever niet

1,616

0.159400276

Unknown

202

0.019925035

Total

10,138

1.000000000

Type of housing

GGB %>%
  tabyl(dwelling_type) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

dwelling_type

n

percent

Andere

225

0.02219373

Appartement/studio

2,132

0.21029789

Halfopen bebouwing

1,869

0.18435589

Open bebouwing

2,654

0.26178733

Rijwoning met tuin

2,862

0.28230420

Rijwoning zonder tuin

396

0.03906096

Total

10,138

1.00000000

Member of association

GGB %>%
  mutate(actiecomite_geluid = case_when(
    actiecomite_geluid == 29 ~ "Yes", 
    actiecomite_geluid == 30 ~ "No"
  )) %>% 
  tabyl(actiecomite_geluid) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

actiecomite_geluid

n

percent

No

9,856

0.97218386

Yes

282

0.02781614

Total

10,138

1.00000000

II. Participants’ quality of life and noise perception

Overall satisfaction

GGB %>%
  tabyl(quality_life) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

quality_life

n

percent

Helemaal niet tevreden

477

0.04705070

Niet tevreden

1,647

0.16245808

Noch tevreden, noch ontevreden

2,024

0.19964490

Tevreden

4,993

0.49250345

Zeer tevreden

997

0.09834287

Total

10,138

1.00000000

Annoyance from environmental noise

library(tidyverse)

# Create new data set with noise_annoyance and other variable, then reshape data
data3 <- GGB %>% 
  select(noise_annoyance, 
         employment,
         dwelling_type,
         gender,
         age_blv, 
         income_binary) %>% 
  gather('var', 'value', -noise_annoyance) %>% 
  mutate(noise_annoyance = factor(noise_annoyance, levels = 1:5, 
                                   labels = c('Helemaal niet gehinderd', 'Niet gehinderd', 
                                              'Tamelijk gehinderd', 'Ernstig gehinderd','Extreem gehinderd')))
## Warning: attributes are not identical across measure variables; they will be
## dropped
# vertical data
data_vertical <- data3 %>% filter(var %in% c("gender", "age_blv", "income_binary"))

# Vertical bar plot
ggplot(data_vertical, aes(x = value, fill = noise_annoyance)) +
  geom_bar(position = 'fill') +
  facet_wrap(~ var, scales = 'free_x') +
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A")) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.title = element_blank()
  ) +
  labs(
    x = "Value",
    y = "Proportion",
    title = "Noise Annoyance - Gender, Age, Income"
  )

# Employment bar plot
data_employment <- data3 %>% filter(var=="employment")
ggplot(data_employment, aes(x = value, fill = noise_annoyance)) +
  geom_bar(position = 'fill') +
  facet_wrap(~ var, scales = 'free_y') +
  coord_flip() +
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A")) +
  theme_minimal() +
  theme(
    axis.text.y = element_text(size = 9),
    legend.title = element_blank(),
    legend.position = "bottom"
  ) +
  labs(
    x = "Value",
    y = "Proportion",
    title = "Noise Annoyance by Employment"
  )+
  guides(fill=guide_legend(nrow=3))

# Dwelling Type bar plot
data_dwelling <- data3 %>% filter(var=="dwelling_type")
ggplot(data_dwelling, aes(x = value, fill = noise_annoyance)) +
  geom_bar(position = 'fill') +
  facet_wrap(~ var, scales = 'free_y') +
  coord_flip() +
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A")) +
  theme_minimal() +
  theme(
    axis.text.y = element_text(size = 9),
    legend.title = element_blank()
  ) +
  labs(
    x = "Value",
    y = "Proportion",
    title = "Noise Annoyance by Dwelling Type"
  )

Change in noise annoyance

GGB %>%
  select(noise_annoyance_cat,
         age_blv,income, nationality, dwelling_type, employment) %>% 
  tbl_summary(by = noise_annoyance_cat) %>% 
  add_p()
## The following errors were returned during `add_p()`:
## âś– For variable `dwelling_type` (`noise_annoyance_cat`) and "estimate",
##   "p.value", "conf.low", and "conf.high" statistics: FEXACT error 5. The hash
##   table key cannot be computed because the largest key is larger than the
##   largest representable int. The algorithm cannot proceed. Reduce the
##   workspace, consider using 'simulate.p.value=TRUE' or another algorithm.
## âś– For variable `employment` (`noise_annoyance_cat`) and "estimate", "p.value",
##   "conf.low", and "conf.high" statistics: FEXACT error 5. The hash table key
##   cannot be computed because the largest key is larger than the largest
##   representable int. The algorithm cannot proceed. Reduce the workspace,
##   consider using 'simulate.p.value=TRUE' or another algorithm.
## âś– For variable `income` (`noise_annoyance_cat`) and "estimate", "p.value",
##   "conf.low", and "conf.high" statistics: FEXACT error 5. The hash table key
##   cannot be computed because the largest key is larger than the largest
##   representable int. The algorithm cannot proceed. Reduce the workspace,
##   consider using 'simulate.p.value=TRUE' or another algorithm.
Characteristic Ernstig gehinderd
N = 2,810
1
Extreem gehinderd
N = 816
1
Helemaal niet gehinderd
N = 219
1
Niet gehinderd
N = 1,720
1
Tamelijk gehinderd
N = 4,573
1
p-value2
age_blv




<0.001
    17-30 122 (4.3%) 44 (5.4%) 22 (10%) 216 (13%) 370 (8.1%)
    31-45 764 (27%) 234 (29%) 43 (20%) 481 (28%) 1,341 (29%)
    46-60 991 (35%) 316 (39%) 58 (26%) 488 (28%) 1,417 (31%)
    61+ 933 (33%) 222 (27%) 96 (44%) 535 (31%) 1,445 (32%)
income





    0 – 1.000 € per maand 9 (0.3%) 2 (0.2%) 1 (0.5%) 9 (0.5%) 11 (0.2%)
    1.001 – 2.000 € per maand 235 (8.4%) 110 (13%) 17 (7.8%) 99 (5.8%) 322 (7.0%)
    2.001 – 3.000 € per maand 505 (18%) 149 (18%) 38 (17%) 212 (12%) 706 (15%)
    3.001 – 4.000 € per maand 486 (17%) 120 (15%) 32 (15%) 255 (15%) 781 (17%)
    4.001 – 5.000 € per maand 478 (17%) 105 (13%) 34 (16%) 377 (22%) 873 (19%)
    5.001 – 6.000 € per maand 311 (11%) 94 (12%) 26 (12%) 250 (15%) 628 (14%)
    meer dan 6.000 € per maand 242 (8.6%) 61 (7.5%) 33 (15%) 218 (13%) 491 (11%)
    Ik antwoord liever niet 478 (17%) 161 (20%) 36 (16%) 276 (16%) 665 (15%)
    Unknown 66 (2.3%) 14 (1.7%) 2 (0.9%) 24 (1.4%) 96 (2.1%)
nationality




0.4
    Belgian National 2,691 (96%) 785 (96%) 213 (97%) 1,659 (96%) 4,416 (97%)
    Not Belgian National 119 (4.2%) 31 (3.8%) 6 (2.7%) 61 (3.5%) 157 (3.4%)
dwelling_type





    Andere 65 (2.3%) 28 (3.4%) 4 (1.8%) 42 (2.4%) 86 (1.9%)
    Appartement/studio 685 (24%) 220 (27%) 26 (12%) 257 (15%) 944 (21%)
    Halfopen bebouwing 476 (17%) 137 (17%) 35 (16%) 340 (20%) 881 (19%)
    Open bebouwing 641 (23%) 199 (24%) 123 (56%) 573 (33%) 1,118 (24%)
    Rijwoning met tuin 835 (30%) 192 (24%) 29 (13%) 453 (26%) 1,353 (30%)
    Rijwoning zonder tuin 108 (3.8%) 40 (4.9%) 2 (0.9%) 55 (3.2%) 191 (4.2%)
employment





    Anders 118 (4.2%) 45 (5.5%) 7 (3.2%) 47 (2.7%) 172 (3.8%)
    Ik ben arbeidsongeschikt (ziekte, invaliditeit) 181 (6.4%) 97 (12%) 7 (3.2%) 65 (3.8%) 186 (4.1%)
    Ik ben gepensioneerd (ook vervroegd pensioen of brugpensioen) 644 (23%) 150 (18%) 70 (32%) 394 (23%) 1,049 (23%)
    Ik ben huisvrouw of huisman 45 (1.6%) 11 (1.3%) 1 (0.5%) 16 (0.9%) 53 (1.2%)
    Ik ben scholier of student 30 (1.1%) 12 (1.5%) 7 (3.2%) 63 (3.7%) 91 (2.0%)
    Ik ben werkzoekend 41 (1.5%) 13 (1.6%) 2 (0.9%) 19 (1.1%) 52 (1.1%)
    Ik werk als zelfstandige, zelfstandig helper of als ondernemer 246 (8.8%) 86 (11%) 23 (11%) 120 (7.0%) 397 (8.7%)
    Ik werk deeltijds in loondienst 405 (14%) 102 (13%) 27 (12%) 250 (15%) 638 (14%)
    Ik werk voltijds in loondienst 1,100 (39%) 300 (37%) 75 (34%) 746 (43%) 1,935 (42%)
1 n (%)
2 Pearson’s Chi-squared test

Taking actions due to noise annoyance

GGB %>%
  tabyl(q2_7_5) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

q2_7_5

n

percent

valid_percent

1

2,630

0.25942

1

7,508

0.74058

Total

10,138

1.00000

1

Nature sounds

nature <- c("nature_birds", "nature_poultry", "nature_crickets", "nature_frogs", "nature_trees", "nature_water", "nature_animals", "nature_other", "nature_none")

# Recode 0 to "No" and 1 to "Yes" for nature variables
GGB[nature] <- lapply(GGB[nature], function(x) {
  factor(case_when(
    x == 1 ~ "Yes",
    x == 0 ~ "No",
    TRUE ~ "Unknown"
  ), levels = c("Unknown", "No", "Yes"))
})


# Prepare data for plotting
nature_data <- GGB[nature] %>%
  mutate_all(as.factor) %>%
  pivot_longer(cols = everything(), names_to = "Variable", values_to = "Response") %>%
  group_by(Variable, Response) %>%
  dplyr::summarise(n = n(), .groups = "drop") %>%
  group_by(Variable) %>%
  mutate(percent = n / sum(n) * 100)

# Plot 100% stacked bar chart
ggplot(nature_data, aes(x = Variable, y = percent, fill = Response)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(label = ifelse(Response == "Yes", paste0(round(percent, 1), "%"), "")),
            position = position_stack(vjust = 1),
            color = "white", size = 3.5, fontface = "bold") +
  scale_fill_manual(values = c("Unknow" = "#FDE0DF", "No" = "#F67D79", "Yes" = "#B21F1A")) +
  labs(title = "Bar Chart of Nature Variables",
       x = NULL, y = "Percentage", fill = "Response") +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Awareness about own produced noise

GGB %>%
  tabyl(bewustzijn_eigen_geluid_cat) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

bewustzijn_eigen_geluid_cat

n

percent

Always

3,648

0.35983429

Often

4,494

0.44328270

Sometimes

1,378

0.13592425

Rarely

468

0.04616295

Never

150

0.01479582

Total

10,138

1.00000000

III. Participants’ homes

Bedroom floor

GGB %>%
  tabyl(floor_bedroom) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

floor_bedroom

n

percent

Ondergronds

11

0.001085027

Gelijkvloers

1,128

0.111264549

1ste verdieping

6,168

0.608404024

2de verdieping

1,702

0.167883212

3de verdieping

453

0.044683370

4de verdieping

187

0.018445453

5de verdieping

103

0.010159795

6de verdieping

49

0.004833300

7de verdieping

41

0.004044190

8de verdieping

0

0.000000000

Hoger dan 8ste verdieping

75

0.007397909

Unknown

221

0.021799171

Total

10,138

1.000000000

Bedroom window

GGB %>%
  tabyl(window_street) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

window_street

n

percent

Yes

4,623

0.456007102

No

5,430

0.535608601

Unknown

85

0.008384297

Total

10,138

1.000000000

Bedroom window at night

GGB %>%
  tabyl(window_night_num) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

window_night_num

n

percent

Always closed

3,122

0.3079503

Mostly closed

3,444

0.3397120

Mostly open

2,433

0.2399882

Always open

1,139

0.1123496

Total

10,138

1.0000000

Environmental noise affect choice to close windows or window grilles at night

GGB %>%
  tabyl(q8_7) %>%
  adorn_totals() %>%
  setNames(c("Noise influence on closing windows at night", "n", "percent")) %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

Noise influence on closing windows at night

n

percent

Yes, this is the main reason

5,477

0.5402446

Yes, but due to other reasons

1,980

0.1953048

No

2,681

0.2644506

Total

10,138

1.0000000

IV. Participants’ sensitivity to sound

Distribution of Hyperacusis

hist(GGB$total_hyperacusis,
     col = "#F67D79",
     main = "Distribution of Hyperacusis",
     xlab = "Total of Hyperacusis")

Distribution of Hyperacusis

hist(GGB$wns_sum_score,
     col = "#F67D79",
     main = "Distribution of WNS",
     xlab = "Total score of WNS")

V. Sleep quality

Sleep Duration (Day vs. Night) _ Barchart

table(is.na(GGB$sleep_duration_day))
## 
## FALSE 
## 10138
# Reshape the data to long format
sleep_data <- GGB %>%
  select(sleep_duration_day, sleep_duration_night) %>%
  pivot_longer(cols = everything(), names_to = "Period", values_to = "Duration") %>%
  group_by(Period, Duration) %>%
  dplyr::summarise(n = n(), .groups = "drop") %>%
  group_by(Period) %>%
  mutate(percent = n / sum(n) * 100)

# Plot
ggplot(sleep_data, aes(x = Period, y = percent, fill = Duration)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(label = ifelse(percent > 5, paste0(round(percent, 1), "%"), "")),
            position = position_stack(vjust = 0.5),
            size = 3, color = "white") +
   coord_flip() +
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#ffa9a9", "#F67D79", "#F72C26", "#B21F1A","#7E2811","#780E00"  )) +
  labs(title = "Distribution of Sleep Duration (Day vs. Night)",
       x = NULL, y = "Percentage", fill = "Sleep Duration") +
  theme_minimal(base_size = 13)

Sleep Duration (Day vs. Night)_Table

# Define all possible 8 categories
categories <- sort(unique(na.omit(c(GGB$sleep_duration_day, GGB$sleep_duration_night))))

# Calculate frequencies for each variable
day_tab <- GGB %>%
  tabyl(sleep_duration_day) %>%
  adorn_totals("row") %>%
  dplyr::rename(category = sleep_duration_day,
         n_day = n, 
         percent_day = percent)

night_tab <- GGB %>%
  tabyl(sleep_duration_night) %>%
  adorn_totals("row") %>%
  dplyr::rename(category = sleep_duration_night,
         n_night = n, percent_night = percent)

# Merge into one table
full_join(day_tab, night_tab, by = "category") %>%
  filter(category != "Total") %>%   # remove total if not needed
  mutate(
    percent_day = round(percent_day * 100, 2),
    percent_night = round(percent_night * 100, 2)) %>% 
  flextable() %>%
  theme_vanilla() %>%
  autofit()

category

n_day

percent_day

n_night

percent_night

0-2 uur

2,283

22.52

10

0.10

10-12 uur

5

0.05

55

0.54

2-4 uur

88

0.87

81

0.80

4-6 uur

56

0.55

1,412

13.93

6-8 uur

211

2.08

6,820

67.27

8-10 uur

65

0.64

1,744

17.20

>12 uur

1

0.01

6

0.06

Niet

7,429

73.28

10

0.10

Time Taken to Fall Asleep

table(GGB$fall_asleep)
## 
##   >120 min   0-15 min  16-30 min  31-45 min  46-60 min 61-120 min 
##         76       4177       3445       1429        669        342
ggplot(GGB, aes(x = "Fall asleep", fill = fall_asleep)) +
  geom_bar(position = "fill", width = 0.5) +
  geom_text(stat = "count", aes(label = paste0(round((..count..) / sum(..count..) * 100, 1), "%")),
            position = position_fill(vjust = 0.5), color = "white", size = 4) +
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#ffa9a9", "#F67D79", "#F72C26", "#B21F1A","#7E2811","#780E00"  )) +
  labs(x = NULL, y = "Percentage", fill = "Time to fall asleep",
       title = "Time Taken to Fall Asleep") +
  theme_minimal(base_size = 14)
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Awaken by disturbed

sleep_disturb <- c("sleep_disturb_road", "sleep_disturb_train", "sleep_disturb_tram", "sleep_disturb_air", "sleep_disturb_neighbor")

# Recode 
GGB[sleep_disturb] <- lapply(GGB[sleep_disturb], function(x) {
  factor(case_when(
    x == 1 ~ "Nooit",
    x == 2 ~ "Minder dan jaarlijks ",
    x == 3 ~ "Een of meerdere keren per jaar, maar niet maandelijks ",
    x == 4 ~ "Een of meerdere keren per maand, maar niet wekelijks ",
    x == 5 ~ "Een of meerdere keren per week, maar niet dagelijks ",
    x == 6 ~ "Elke nacht",
    TRUE ~ "Unknown"
  ), levels = c("Unknown", "Nooit", "Minder dan jaarlijks ", "Een of meerdere keren per week, maar niet dagelijks ","Een of meerdere keren per maand, maar niet wekelijks ", "Een of meerdere keren per jaar, maar niet maandelijks ", "Elke nacht"))
})

# Prepare data for plotting
sleep_disturb_data <- GGB[sleep_disturb] %>%
  mutate_all(as.factor) %>%
  pivot_longer(cols = everything(), names_to = "Variable", values_to = "Response") %>%
  group_by(Variable, Response) %>%
  dplyr::summarise(n = n(), .groups = "drop") %>%
  group_by(Variable) %>%
  mutate(percent = n / sum(n) * 100)

# Plot 100% stacked bar chart
ggplot(sleep_disturb_data, aes(x = Variable, y = percent, fill = Response)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(label = ifelse(Response == "Yes", paste0(round(percent, 1), "%"), "")),
            position = position_stack(vjust = 1),
            color = "white", size = 3.5, fontface = "bold") +
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A","#7E2811","#780E00"  )) +
  labs(title = "Bar Chart of Nature Variables",
       x = NULL, y = "Percentage", fill = "Response") +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Snoring –> couldn’t find the variable

ESS

Gender

ggplot(GGB, aes(x = gender, y = total_ess, fill = gender)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format", 
                     label.x = 4.45,
                     label.y = 5.5) + # Add test result
   scale_fill_manual(values = c("#FDE0DF", "#F67D79", "#F72C26", "#780E00"))+
  labs(
    title = "Distribution of Total ESS by Gender",
    x = "Gender",
    y = "Total ESS Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none") 

Based on the one-way ANOVA (p = 0.99), there is no evidence of a statistically significant difference in total ESS scores across gender group

Age group

ggplot(GGB, aes(x = age_blv, y = total_ess, fill = age_blv)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format",
                     label.x = 4.45,
                     label.y = 5.5) + # Add test result
  scale_fill_manual(values = c("#FDE0DF", "#F67D79", "#F72C26", "#780E00"))+
  labs(
    title = "Distribution of Total ESS by Age Group",
    x = "Age group",
    y = "Total ESS Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none") 

Based on the one-way ANOVA (p < 0.001), there is a statistically significant difference in total ESS scores across age group

Income

ggplot(GGB %>% filter(!is.na(income_binary)), aes(x = income_binary, y = total_ess, fill = income_binary)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "wilcox.test", label = "p.format",
                     label.x = 2.45,
                     label.y = 5.5) + # Add test result
  scale_fill_manual(values = c( "#F67D79", "#780E00"))+
  labs(
    title = "Distribution of Total ESS by Income",
    x = "Income",
    y = "Total ESS Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none") 

Based on the Wilcoxon test (p = 0.72), there is no evidence of a statistically significant difference in total ESS scores across income group

Dwelling Type

ggplot(GGB, aes(x = dwelling_type, y = total_ess, fill = dwelling_type)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format",
                     label.x = 6.40,
                     label.y = 5.5) + # Add test result
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A","#7E2811" ))+
  labs(
    title = "Distribution of Total ESS by Dwelling Type",
    x = "Dwelling Type",
    y = "Total ESS Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none") 

Based on the anova test (p = 0.35), there is no evidence of a statistically significant difference in total ESS scores across dwelling type.

Employment

table(GGB$employment)
## 
##                                                         Anders 
##                                                            389 
##                Ik ben arbeidsongeschikt (ziekte, invaliditeit) 
##                                                            536 
##  Ik ben gepensioneerd (ook vervroegd pensioen of brugpensioen) 
##                                                           2307 
##                                    Ik ben huisvrouw of huisman 
##                                                            126 
##                                     Ik ben scholier of student 
##                                                            203 
##                                             Ik ben werkzoekend 
##                                                            127 
## Ik werk als zelfstandige, zelfstandig helper of als ondernemer 
##                                                            872 
##                                Ik werk deeltijds in loondienst 
##                                                           1422 
##                                 Ik werk voltijds in loondienst 
##                                                           4156
ggplot(GGB, aes(x = employment, y = total_ess, fill = employment)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format",
                     label.x = 9.35,
                     label.y = 4.5) + # Add test result
  labs(
    title = "Distribution of Total ESS by Employment",
    x = "Employment",
    y = "Total ESS Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none") 

FAS

Gender

ggplot(GGB, aes(x = gender, y = total_fas, fill = gender)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format", 
                     label.x = 4.45,
                     label.y = 5.5) + # Add test result
  scale_fill_manual(values = c("#FDE0DF", "#F67D79", "#F72C26", "#780E00"))+
  labs(
    title = "Distribution of Total fas by Gender",
    x = "Gender",
    y = "Total fas Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none") 

Based on the one-way ANOVA (p < 0.001), there is a statistically significant difference in total fas scores across gender group

Age group

ggplot(GGB, aes(x = age_blv, y = total_fas, fill = age_blv)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format",
                     label.x = 4.45,
                     label.y = 7.5) + # Add test result
  scale_fill_manual(values = c("#FDE0DF", "#F67D79", "#F72C26", "#780E00"))+
  labs(
    title = "Distribution of Total fas by Age Group",
    x = "Age group",
    y = "Total fas Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none") 

Based on the one-way ANOVA (p < 0.001), there is a statistically significant difference in total fas scores across age group

Income

ggplot(GGB %>% filter(!is.na(income_binary)), aes(x = income_binary, y = total_fas, fill = income_binary)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "t.test", label = "p.format",
                     label.x = 2.45,
                     label.y = 7.5) + # Add test result
  scale_fill_manual(values = c( "#F67D79", "#780E00"))+
  labs(
    title = "Distribution of Total fas by Income",
    x = "Income",
    y = "Total fas Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none") 

Based on the Wilcoxon test (p < 0.001), there is a statistically significant difference in total fas scores across income group

Dwelling Type

ggplot(GGB, aes(x = dwelling_type, y = total_fas, fill = dwelling_type)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format",
                     label.x = 6.40,
                     label.y = 5.5) + # Add test result
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A","#7E2811" ))+
  labs(
    title = "Distribution of Total fas by Dwelling Type",
    x = "Dwelling Type",
    y = "Total fas Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none") 

Based on the one-way ANOVA (p < 0.001), there is a statistically significant difference in total fas scores across dwelling type.

Employment

table(GGB$employment)
## 
##                                                         Anders 
##                                                            389 
##                Ik ben arbeidsongeschikt (ziekte, invaliditeit) 
##                                                            536 
##  Ik ben gepensioneerd (ook vervroegd pensioen of brugpensioen) 
##                                                           2307 
##                                    Ik ben huisvrouw of huisman 
##                                                            126 
##                                     Ik ben scholier of student 
##                                                            203 
##                                             Ik ben werkzoekend 
##                                                            127 
## Ik werk als zelfstandige, zelfstandig helper of als ondernemer 
##                                                            872 
##                                Ik werk deeltijds in loondienst 
##                                                           1422 
##                                 Ik werk voltijds in loondienst 
##                                                           4156
ggplot(GGB, aes(x = employment, y = total_fas, fill = employment)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format",
                     label.x = 9.35,
                     label.y = 4.5) + # Add test result
  labs(
    title = "Distribution of Total fas by Employment",
    x = "Employment",
    y = "Total fas Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none") 

VI. Overall health

stress

Gender

ggplot(GGB, aes(x = gender, y = pss_total, fill = gender)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format", 
                     label.x = 4.45,
                     label.y = 5.5) + # Add test result
  scale_fill_manual(values = c("#FDE0DF", "#F67D79", "#F72C26", "#780E00"))+
  labs(
    title = "Distribution of Total stress by Gender",
    x = "Gender",
    y = "Total stress Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none") 

Based on the one-way ANOVA (p < 0.001), there is a statistically significant difference in total stress scores across gender group

Age group

ggplot(GGB, aes(x = age_blv, y = pss_total, fill = age_blv)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format",
                     label.x = 4.45,
                     label.y = 5.5) + # Add test result
  scale_fill_manual(values = c("#FDE0DF", "#F67D79", "#F72C26", "#780E00"))+
  labs(
    title = "Distribution of Total stress by Age Group",
    x = "Age group",
    y = "Total stress Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none") 

Based on the one-way ANOVA (p < 0.001), there is a statistically significant difference in total stress scores across age group

Income

ggplot(GGB %>% filter(!is.na(income_binary)), aes(x = income_binary, y = pss_total, fill = income_binary)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "wilcox.test", label = "p.format",
                     label.x = 2.45,
                     label.y = 5.5) + # Add test result
  scale_fill_manual(values = c( "#F67D79", "#780E00"))+
  labs(
    title = "Distribution of Total stress by Income",
    x = "Income",
    y = "Total stress Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none") 

Based on the Wilcoxon (p < 0.001), there is a statistically significant difference in total stress scores across income group

Dwelling Type

ggplot(GGB, aes(x = dwelling_type, y = pss_total, fill = dwelling_type)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format",
                     label.x = 6.40,
                     label.y = 5.5) + # Add test result
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A","#7E2811" ))+
  labs(
    title = "Distribution of Total stress by Dwelling Type",
    x = "Dwelling Type",
    y = "Total stress Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none") 

Based on the anova test (p < 0.001), there is a statistically significant difference in total stress scores across dwelling type.

Employment

table(GGB$employment)
## 
##                                                         Anders 
##                                                            389 
##                Ik ben arbeidsongeschikt (ziekte, invaliditeit) 
##                                                            536 
##  Ik ben gepensioneerd (ook vervroegd pensioen of brugpensioen) 
##                                                           2307 
##                                    Ik ben huisvrouw of huisman 
##                                                            126 
##                                     Ik ben scholier of student 
##                                                            203 
##                                             Ik ben werkzoekend 
##                                                            127 
## Ik werk als zelfstandige, zelfstandig helper of als ondernemer 
##                                                            872 
##                                Ik werk deeltijds in loondienst 
##                                                           1422 
##                                 Ik werk voltijds in loondienst 
##                                                           4156
ggplot(GGB, aes(x = employment, y = pss_total, fill = employment)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format",
                     label.x = 9.35,
                     label.y = 4.5) + # Add test result
  labs(
    title = "Distribution of Total stress by Employment",
    x = "Employment",
    y = "Total stress Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none") 

General physical health status

GGB %>%
  tabyl(q4_2) %>%
  adorn_totals() %>%
  setNames(c("General physical health status", "n", "percent")) %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

General physical health status

n

percent

Heel goed

1,471

0.145097652

Eerder goed

4,767

0.470211087

Gemiddeld

3,033

0.299171434

Eerder slecht

810

0.079897416

Heel slecht

57

0.005622411

Total

10,138

1.000000000

Physical complaints

physical_complaints <- c("q4_3_1", "q4_3_2", "q4_3_3", "q4_3_4", "q4_3_5", "q4_3_6", "q4_3_7", "q4_3_8")

# Recode 
GGB[physical_complaints] <- lapply(GGB[physical_complaints], function(x) {
  factor(case_when(
    x == 1 ~ "Nooit",
    x == 2 ~ "Een paar keer per jaar",
    x == 3 ~ "Een paar keer per maand",
    x == 4 ~ "Een paar keer per week",
    x == 5 ~ "Dagelijks"
  ), levels = c("Nooit", "Een paar keer per jaar", "Een paar keer per maand","Een paar keer per week", "Dagelijks"))
})

# Prepare data for plotting
physical_complaints_data <- GGB[physical_complaints] %>%
  mutate_all(as.factor) %>%
  pivot_longer(cols = everything(), names_to = "Variable", values_to = "Response") %>%
  group_by(Variable, Response) %>%
  dplyr::summarise(n = n(), .groups = "drop") %>%
  group_by(Variable) %>%
  mutate(percent = n / sum(n) * 100)

# Relabel variable names
physical_complaints_data <- physical_complaints_data %>%
  mutate(Variable = revalue(Variable,
    c("q4_3_1" = "Hoofdpijn",
    "q4_3_2" = "Duizeligheid",
    "q4_3_3" = "Hartkloppingen",
    "q4_3_4" = "Maag- en darmklachten",
    "q4_3_5" = "Slaperigheid",
    "q4_3_6" = "Vermoeidheid",
    "q4_3_7" = "Slapeloosheid",
    "q4_3_8" = "Psychische stoornissen"
  )))
## The following `from` values were not present in `x`: q4_3_2, q4_3_3, q4_3_4, q4_3_5, q4_3_6, q4_3_7, q4_3_8
## The following `from` values were not present in `x`: q4_3_1, q4_3_3, q4_3_4, q4_3_5, q4_3_6, q4_3_7, q4_3_8
## The following `from` values were not present in `x`: q4_3_1, q4_3_2, q4_3_4, q4_3_5, q4_3_6, q4_3_7, q4_3_8
## The following `from` values were not present in `x`: q4_3_1, q4_3_2, q4_3_3, q4_3_5, q4_3_6, q4_3_7, q4_3_8
## The following `from` values were not present in `x`: q4_3_1, q4_3_2, q4_3_3, q4_3_4, q4_3_6, q4_3_7, q4_3_8
## The following `from` values were not present in `x`: q4_3_1, q4_3_2, q4_3_3, q4_3_4, q4_3_5, q4_3_7, q4_3_8
## The following `from` values were not present in `x`: q4_3_1, q4_3_2, q4_3_3, q4_3_4, q4_3_5, q4_3_6, q4_3_8
## The following `from` values were not present in `x`: q4_3_1, q4_3_2, q4_3_3, q4_3_4, q4_3_5, q4_3_6, q4_3_7
# Plot 100% stacked bar chart
ggplot(physical_complaints_data, aes(x = Variable, y = percent, fill = Response)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(label = ifelse(Response == "Yes", paste0(round(percent, 1), "%"), "")),
            position = position_stack(vjust = 1),
            color = "white", size = 3.5, fontface = "bold") +
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F72C26", "#B21F1A","#7E2811"  )) +
  labs(title = "Bar Chart of Physical complaints",
       x = NULL, y = "Percentage", fill = "Response") +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Chornic condition

freq <- table(GGB$q4_4)
pct <- round(100 * freq / sum(freq), 1)
labels <- paste0(names(freq), ": ", freq, " (", pct, "%)")
pie(freq, labels = labels, main = "Pie chart of chronic condition", col= c("#FDE0DF", "#F72C26"))

Chronic disorders

chronic_disorders <- c("q4_5_1", "q4_5_2", "q4_5_3", "q4_5_4", "q4_5_5", "q4_5_6", "q4_5_7")

# Prepare data for plotting
chronic_disorders_data <-  GGB %>%
  select(all_of(chronic_disorders)) %>%
  pivot_longer(cols = everything(), names_to = "variable", values_to = "value") %>%  
  filter(value == 1) 

summary_chronic_disorders <- aggregate(value == 1 ~ variable, data = chronic_disorders_data , sum)


# Relabel variable names
names(summary_chronic_disorders)[2] <- "frequency"
summary_chronic_disorders <- summary_chronic_disorders %>%
  mutate(variable = revalue(variable,
    c("q4_5_1" = "Aandoeningen van het ademhalingsstelsel 
      (bijvoorbeeld astma, COPD)",
    "q4_5_2" = "Aandoeningen van het spijsverteringsstelsel
    (bijvoorbeeld ziekte van Crohn, levercirrose)",
    "q4_5_3" = "Hart- en vaataandoeningen 
    (bijvoorbeeld hoge bloeddruk, hartritmestoornissen)",
    "q4_5_4" = "Neurologische en psychische aandoeningen 
    (bijvoorbeeld epilepsie, multiple sclerose)",
    "q4_5_5" = "Metabole aandoeningen 
    (bijvoorbeeld diabetes, obesitas)",
    "q4_5_6" = "Andere",
    "q4_5_7" = "Slaapapneu"
  ))) %>% 
  mutate(variable = reorder(variable, frequency))


# Plot bar chart
ggplot(summary_chronic_disorders, aes(x = variable, y = frequency)) +
  geom_bar(stat = "identity", width = 0.7, fill = "red") +
  geom_text(aes(label = frequency), hjust = -0.1, size = 4) +
  coord_flip()+
  labs(title = "Bar Chart of Chronic disorders",
       x = NULL, y = "Frequency") +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
         plot.title = element_text(hjust = 1))+ 
  expand_limits(y = max(summary_chronic_disorders$frequency) * 1.1)

Allergic condition

freq_4_6 <- table(GGB$q4_6)
pct_4_6 <- round(100 * freq_4_6 / sum(freq_4_6), 1)
labels_4_6 <- paste0(names(freq_4_6), ": ", freq_4_6, " (", pct_4_6, "%)")
pie(freq_4_6, labels = labels_4_6, main = "Pie chart of allergic condition", col= c("#FDE0DF", "#F72C26"))

Allergic disorders

ggplot(GGB %>% filter(!is.na(q4_7)), aes(x=q4_7)) +
  geom_bar(width = 0.7, fill = "red")+
  coord_flip()+
  labs(title = "Bar graph of allergic disorders",
    x = "Allergic disorders")+
  theme(axis.text.x = element_text(hjust = 1),
         plot.title = element_text(hjust = 1))

Respiratory infections

ggplot(data = (GGB %>%
         filter(!is.na(q4_8)) %>%
         filter(q4_8 >= quantile(q4_8, 0.25) - 1.5 * IQR(q4_8),
                q4_8 <= quantile(q4_8, 0.75) + 1.5 * IQR(q4_8))),
       aes(x = q4_8)) +
  geom_histogram(binwidth = 1, fill = "red", color = "black") +
  labs(title = "Histogram of Respiratory infections",
       x = "Respiratory infections (times)", y = "Count") +
  theme_minimal()

Smoking

GGB %>%
    mutate(q4_9 = case_when(
    q4_9 == 1 ~ "Yes", 
    q4_9 == 2 ~ "No"
  )) %>% 
  tabyl(q4_9) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

q4_9

n

percent

No

9,287

0.91605839

Yes

851

0.08394161

Total

10,138

1.00000000

Moderate to intense exercise

ggplot(data = (GGB %>%
         filter(!is.na(q4_13)) %>%
         filter(q4_13 >= quantile(q4_13, 0.25) - 1.5 * IQR(q4_13),
                q4_13 <= quantile(q4_13, 0.75) + 1.5 * IQR(q4_13))),
       aes(x = q4_13)) +
  geom_histogram(binwidth = 1, fill = "red", color = "black") +
  labs(title = "Histogram of Moderate to intense exercise",
       x = "Moderate to intense exercise (hours)", y = "Count") +
  theme_minimal()

Time in outdoor environment

ggplot(data = (GGB %>%
         filter(!is.na(q4_14)) %>%
         filter(q4_14 >= quantile(q4_14, 0.25) - 1.5 * IQR(q4_14),
                q4_14 <= quantile(q4_14, 0.75) + 1.5 * IQR(q4_14))),
       aes(x = q4_14)) +
  geom_histogram(binwidth = 1, fill = "red", color = "black") +
  labs(title = "Histogram of Time in outdoor environment ",
       x = "Time in outdoor environment (hours)", y = "Count") +
  theme_minimal()

Time in using smartphone

ggplot(data = (GGB %>%
         filter(!is.na(smartphone_use)) %>%
         filter(smartphone_use >= quantile(smartphone_use, 0.25) - 1.5 * IQR(smartphone_use),
                smartphone_use <= quantile(smartphone_use, 0.75) + 1.5 * IQR(smartphone_use))),
       aes(x = smartphone_use)) +
  geom_histogram(binwidth = 1, fill = "red", color = "black") +
  labs(title = "Histogram of Time in using smartphone",
       x = "Time in using smartphone (hours)", y = "Count") +
  theme_minimal()

Driving frequency

GGB %>%
  tabyl(q4_16) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

q4_16

n

percent

Nooit (al dan niet in het bezit van een rijbewijs)

1,016

0.10021701

Minder dan jaarlijks

122

0.01203393

Een of meerdere keren per jaar, maar niet maandelijks

490

0.04833300

Een of meerdere keren per maand, maar niet wekelijks

1,549

0.15279148

Een of meerdere keren per week, maar niet dagelijks

4,421

0.43608207

Dagelijks

2,540

0.25054251

Total

10,138

1.00000000

VII. Participants’ hearing status

Hearing status

# Prepare data for plotting
hearing_status_data <- GGB %>%
  select(c("q7_2_1", "q7_2_2")) %>%
  mutate_all(as.factor) %>%
  pivot_longer(cols = everything(), names_to = "Variable", values_to = "Response") %>%
  group_by(Variable, Response) %>%
  dplyr::summarise(n = n(), .groups = "drop") %>%
  group_by(Variable) %>%
  mutate(percent = n / sum(n) * 100) %>% 
  mutate(Variable = case_when(
    Variable == "q7_2_1" ~ "Right ear",
    Variable == "q7_2_2" ~ "Left ear"
  ))

# Plot 100% stacked bar chart
ggplot(hearing_status_data, aes(x = Variable, y = percent, fill = Response)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(label = ifelse(Response == "Yes", paste0(round(percent, 1), "%"), "")),
            position = position_stack(vjust = 1),
            color = "white", size = 3.5, fontface = "bold") +
  coord_flip()+
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F72C26", "#7E2811"  )) +
  labs(title = "Bar Chart of Hearing Status",
       x = NULL, y = "Percentage", fill = "Response") +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(hjust = 1),
        legend.position = "bottom")+
  guides(fill = guide_legend(nrow = 4))

Hearing problem

GGB %>%
  tabyl(q7_3) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

q7_3

n

percent

Ik heb nooit gehoorproblemen

6,520

0.6431249

Ik heb een (licht) gehoorprobleem of twijfel of ik er een heb

3,618

0.3568751

Total

10,138

1.0000000

Tinnitus

GGB %>%
  tabyl(q7_5) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

q7_5

n

percent

Ja

2,660

0.26237917

Ik weet het niet

640

0.06312882

Nee

6,838

0.67449201

Total

10,138

1.00000000