Before analyses

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.4.3

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(tidyverse)

## Warning: package 'ggplot2' was built under R version 4.4.3

## Warning: package 'purrr' was built under R version 4.4.3

## Warning: package 'lubridate' was built under R version 4.4.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.2     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(janitor)

## Warning: package 'janitor' was built under R version 4.4.3

## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(flextable)

## Warning: package 'flextable' was built under R version 4.4.3

## 
## Attaching package: 'flextable'
## 
## The following object is masked from 'package:purrr':
## 
##     compose

library(rlang)

## Warning: package 'rlang' was built under R version 4.4.3

## 
## Attaching package: 'rlang'
## 
## The following objects are masked from 'package:purrr':
## 
##     %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
##     flatten_raw, invoke, splice

library(stringr)
library(gtsummary)

## Warning: package 'gtsummary' was built under R version 4.4.3

## 
## Attaching package: 'gtsummary'
## 
## The following object is masked from 'package:flextable':
## 
##     continuous_summary

library(rstatix)

## 
## Attaching package: 'rstatix'
## 
## The following object is masked from 'package:janitor':
## 
##     make_clean_names
## 
## The following object is masked from 'package:stats':
## 
##     filter

library(ggplot2)
library(plyr)

## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## 
## The following objects are masked from 'package:rstatix':
## 
##     desc, mutate
## 
## The following object is masked from 'package:gtsummary':
## 
##     mutate
## 
## The following object is masked from 'package:purrr':
## 
##     compact
## 
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

library(ggpubr)

## 
## Attaching package: 'ggpubr'
## 
## The following object is masked from 'package:plyr':
## 
##     mutate
## 
## The following objects are masked from 'package:flextable':
## 
##     border, font, rotate

#data <- read.csv("2025-07-10_GGB_Merged_PERSONALINFO_NewGGBParticipants_WITHGPS.csv")
GGB <- read.csv("C:/Users/thuyb/OneDrive/Desktop/Student Job/Dataset/2025-07-10_GGB_Merged_PERSONALINFO_NewGGBParticipants_WITHGPS.csv")
#labels <- read.csv("/Users/forllychavez/Library/Mobile Documents/com~apple~CloudDocs/Desktop/Escritorio - MacBook Air de Forlly/Master of Epidemiology/2nd Year/Student Job 2.1/Documents to read/Analyses GGB Forlly Romero/Dataset/GGB labels.csv")

GGB <- GGB %>% 
  clean_names()

#Age categories based on blv

GGB<- GGB %>% 
  mutate(
    age_blv = case_when(age %in% c(17:30) ~ '17-30',
                         age %in% c(31:45) ~ '31-45',
                         age %in% c(46:60) ~ '46-60',
                         age %in% c(61:92) ~ '61+'
                         ) %>% factor(levels = c('17-30', '31-45', '46-60','61+' )))

#Gender
GGB <- GGB %>%
  mutate(gender = factor(gender, 
                            levels = c("Man", 
                                       "Vrouw", 
                                       "Niet-binaire geslachtsidentiteit", 
                                       "Ik antwoord liever niet"),
                            ordered = TRUE))

# Income
GGB <- GGB %>%
  mutate(
    income = case_when(
      income %in% c("0 – 1.000 € per maand",
                    "1.001 – 2.000 € per maand",
                    "2.001 – 3.000 € per maand",
                    "3.001 – 4.000 € per maand",
                    "4.001 – 5.000 € per maand",
                    "5.001 – 6.000 € per maand",
                    "meer dan 6.000 € per maand",
                    "Ik antwoord liever niet") ~ income,
      TRUE ~ "Unknown"  # Everything else becomes "Unknown"
    ),
    income = factor(income,
                    levels = c("0 – 1.000 € per maand",
                               "1.001 – 2.000 € per maand",
                               "2.001 – 3.000 € per maand",
                               "3.001 – 4.000 € per maand",
                               "4.001 – 5.000 € per maand",
                               "5.001 – 6.000 € per maand",
                               "meer dan 6.000 € per maand",
                               "Ik antwoord liever niet",
                               "Unknown"),
                    ordered = TRUE)
  )

  
# Income binary 
GGB <- GGB %>%
  mutate(income_binary = case_when(
    income %in% c("0 – 1.000 € per maand",
                  "1.001 – 2.000 € per maand",
                  "2.001 – 3.000 € per maand") ~ "≤3000",
    income %in% c("3.001 – 4.000 € per maand",
                  "4.001 – 5.000 € per maand",
                  "5.001 – 6.000 € per maand",
                  "meer dan 6.000 € per maand") ~ ">3000",
    income == "Ik antwoord liever niet" ~ NA_character_,
    TRUE ~ NA_character_
  ))

# Education
GGB <- GGB %>%
  mutate(education = factor(education, 
                            levels = c("Geen", 
                                       "Lager onderwijs", 
                                       "Lager secundair onderwijs (getuigschrift 4de jaar)", 
                                       "Hoger secundair onderwijs",
                                       "Niet-universitair hoger onderwijs / bachelor",
                                       "Universitair onderwijs / master / doctoraat",
                                       "Ik antwoord liever niet / Geen antwoord"),
                            ordered = TRUE))

# Nationality 
GGB <- GGB %>% 
  mutate(nationality_belgian = as.factor(nationality_belgian)) %>%
  mutate(nationality = case_when(
  nationality_belgian == 1 ~ "Belgian National",
  is.na(nationality_belgian) ~ "Not Belgian National",
  TRUE ~"Not Belgian National"
))

# Noise annoyance
GGB <- GGB %>% 
  mutate(
    noise_annoyance_cat = case_when(
      noise_annoyance == 1  ~ 'Helemaal niet gehinderd',
      noise_annoyance == 2  ~ 'Niet gehinderd',
      noise_annoyance == 3  ~ 'Tamelijk gehinderd',
      noise_annoyance == 4  ~ 'Ernstig gehinderd',
      noise_annoyance == 5  ~ 'Extreem gehinderd'
    ))

# Awareness about own produced noise
GGB <- GGB %>% 
  mutate(
    bewustzijn_eigen_geluid_cat = factor(case_when(
      bewustzijn_eigen_geluid == 1  ~ 'Always',
      bewustzijn_eigen_geluid == 2  ~ 'Often',
      bewustzijn_eigen_geluid == 3  ~ 'Sometimes',
      bewustzijn_eigen_geluid == 4  ~ 'Rarely',
      bewustzijn_eigen_geluid == 5  ~ 'Never'),
      levels = c('Always', 'Often', 'Sometimes', 'Rarely', 'Never')))

# QUality of life 
GGB <- GGB %>% 
  mutate(
    quality_life = case_when( 
      gen_satisfaction == 1 ~ 'Zeer tevreden',
      gen_satisfaction == 2 ~ 'Tevreden',
      gen_satisfaction == 3 ~ 'Noch tevreden, noch ontevreden',
      gen_satisfaction == 4 ~ 'Niet tevreden',
      gen_satisfaction == 5 ~ 'Helemaal niet tevreden'
  ))

# Bedroom floor
GGB <- GGB %>%
  mutate(
    floor_bedroom = case_when(
      floor_bedroom %in% c("Ondergronds",
                    "Gelijkvloers",
                    "1ste verdieping",
                    "2de verdieping",
                    "3de verdieping",
                    "4de verdieping",
                    "5de verdieping",
                    "6de verdieping",
                    "7de verdieping",
                    "8de verdieping",
                    "Hoger dan 8ste verdieping") ~ floor_bedroom,
      TRUE ~ "Unknown"  # Everything else becomes "Unknown"
    ),
    floor_bedroom = factor(floor_bedroom,
                    levels = c("Ondergronds",
                    "Gelijkvloers",
                    "1ste verdieping",
                    "2de verdieping",
                    "3de verdieping",
                    "4de verdieping",
                    "5de verdieping",
                    "6de verdieping",
                    "7de verdieping",
                    "8de verdieping",
                    "Hoger dan 8ste verdieping",
                    "Unknown"),
                    ordered = TRUE)
  )

# Window street 
GGB <- GGB %>% 
  mutate(
    window_street = factor(case_when(
      window_street == 1 ~ "Yes",
      window_street == 0 ~ "No",
      TRUE ~ "Unknown"),
      levels = c("Yes", "No", "Unknown")))

# Bedroom windows at night
GGB <- GGB %>% 
  mutate(
    window_night_num = factor(case_when(
      window_night_num == 5 ~ "Always closed",
      window_night_num == 6 ~ "Mostly closed",
      window_night_num == 7 ~ "Mostly open",
      window_night_num == 8 ~ "Always open"),
      levels = c("Always closed", "Mostly closed", "Mostly open", "Always open")))

# Environmental noise affect your choice to close windows or window grilles at night
GGB <- GGB %>% 
  mutate(
    q8_7 = factor(case_when(
      q8_7 == 5 ~ "Yes, this is the main reason",
      q8_7 == 6 ~ "Yes, but due to other reasons",
      q8_7 == 7 ~ "No"),
      levels = c("Yes, this is the main reason", "Yes, but due to other reasons", "No")))

# General physical health status
GGB <- GGB %>% 
  mutate(
    q4_2 = factor(case_when(
      q4_2 == 1 ~ "Heel goed",
      q4_2 == 2 ~ "Eerder goed",
      q4_2 == 3 ~ "Gemiddeld",
      q4_2 == 4 ~ "Eerder slecht",
      q4_2 == 5 ~ "Heel slecht"),
      levels = c("Heel goed", "Eerder goed", "Gemiddeld", "Eerder slecht", "Heel slecht")))

# Chronic disorders
GGB <- GGB %>% 
  mutate(q4_4 = case_when(
    q4_4 == 1 ~ "Yes", 
    q4_4 == 2 ~ "No"
  ))

# Allergic disorders
GGB <- GGB %>% 
  mutate(q4_6 = case_when(
    q4_6 == 1 ~ "Yes", 
    q4_6 == 2 ~ "No"
  ))

GGB <- GGB %>% 
  mutate(q4_7 = case_when(
    q4_7 == 1 ~ "Allergische astma", 
    q4_7 == 2 ~ "Inhalatieallergie 
    (hooikoorts/pollen, huisstofmijt, dieren)",
    q4_7 == 3 ~ "Voedselallergie",
    q4_7 == 4 ~ "Eczeem, atopische dermatitis",
    q4_7 == 5 ~ "Andere"
  ))

# Driving frequency 
GGB <-GGB %>%
    mutate(q4_16 = case_when(
    q4_16 == 1 ~ "Nooit (al dan niet in het bezit van een rijbewijs)",
    q4_16 == 2 ~ "Minder dan jaarlijks",
    q4_16 == 3 ~ "Een of meerdere keren per jaar, maar niet maandelijks",
    q4_16 == 4 ~ "Een of meerdere keren per maand, maar niet wekelijks",
    q4_16 == 5 ~ "Een of meerdere keren per week, maar niet dagelijks",
    q4_16 == 6 ~ "Dagelijks"),
    q4_16 = factor(q4_16,
                     level = c("Nooit (al dan niet in het bezit van een rijbewijs)", "Minder dan jaarlijks", "Een of meerdere keren per jaar, maar niet maandelijks", "Een of meerdere keren per maand, maar niet wekelijks", "Een of meerdere keren per week, maar niet dagelijks", "Dagelijks" )))

# Participants' hearing status
GGB <-GGB %>% # right ear
    mutate(q7_2_1 = factor(case_when(
    q7_2_1 == 1 ~ "Normaal gehoor",
    q7_2_1 == 2 ~ "Verminderd of geen gehoor, maar zonder gebruik van hoortoestel/implantaat",
    q7_2_1 == 3 ~ "Met hoortoestel/ gehoorapparaat",
    q7_2_1 == 4 ~ "Met implantaat (vb. cochleair implantaat, middenoorimplantaat)"),
    level = c("Normaal gehoor", "Verminderd of geen gehoor, maar zonder gebruik van hoortoestel/implantaat", "Met hoortoestel/ gehoorapparaat", "Met implantaat (vb. cochleair implantaat, middenoorimplantaat)" )))

GGB <-GGB %>% # left ear
    mutate(q7_2_2 = factor(case_when(
    q7_2_2 == 1 ~ "Normaal gehoor",
    q7_2_2 == 2 ~ "Verminderd of geen gehoor, maar zonder gebruik van hoortoestel/implantaat",
    q7_2_2 == 3 ~ "Met hoortoestel/ gehoorapparaat",
    q7_2_2 == 4 ~ "Met implantaat (vb. cochleair implantaat, middenoorimplantaat)"),
    level = c("Normaal gehoor", "Verminderd of geen gehoor, maar zonder gebruik van hoortoestel/implantaat", "Met hoortoestel/ gehoorapparaat", "Met implantaat (vb. cochleair implantaat, middenoorimplantaat)" )))

# Hearing problem
GGB <-GGB %>% 
    mutate(q7_3 = factor(case_when(
    q7_3 == 1 ~ "Ik heb nooit gehoorproblemen",
    q7_3 == 2 ~ "Ik heb een (licht) gehoorprobleem of twijfel of ik er een heb"),
    level = c("Ik heb nooit gehoorproblemen", "Ik heb een (licht) gehoorprobleem of twijfel of ik er een heb")))

# Tinnitus
GGB <-GGB %>%
    mutate(q7_5 = factor(case_when(
    q7_5 == 1 ~ "Ja",
    q7_5 == 2 ~ "Ik weet het niet",
    q7_5 == 3 ~ "Nee"),
    level = c("Ja", "Ik weet het niet", "Nee")))

I. Sociodemographics

Number of participant

GGB %>%
  mutate(stad_grouped= ifelse(stad %in% c("Antwerpen", "Gent", "Leuven"), stad, "Other")) %>%
  tabyl(stad_grouped) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

stad_grouped	n	percent
Antwerpen	1,759	0.17350562
Gent	1,317	0.12990728
Leuven	560	0.05523772
Other	6,502	0.64134938
Total	10,138	1.00000000

Participants per age and gender

 GGB %>%
  tabyl(age_blv, gender) %>%                       
  adorn_totals(where = c("col", "row"))%>% 
  flextable() %>%
  add_header_row(values = c("Leeftijdsklasse", "Aantal steekproefpersonen"), 
                 colwidths = c(1, 5)) %>%         
  theme_vanilla() %>% 
  align(align = 'center', part = 'all')

Leeftijdsklasse	Aantal steekproefpersonen
age_blv	Man	Vrouw	Niet-binaire geslachtsidentiteit	Ik antwoord liever niet	Total
17-30	248	506	8	12	774
31-45	1,095	1,733	12	23	2,863
46-60	1,358	1,884	8	20	3,270
61+	1,774	1,438	1	18	3,231
Total	4,475	5,561	29	73	10,138

Family composition

tibble(
  Statistic = c("Mean", "Median", "Min", "Max"),
  Cohabitants_Adults = c(
    round(mean(GGB$cohabitants_adults, na.rm = TRUE), 2),
    round(median(GGB$cohabitants_adults, na.rm = TRUE), 2),
    round(min(GGB$cohabitants_adults, na.rm = TRUE), 2),
    round(max(GGB$cohabitants_adults, na.rm = TRUE), 0)),
  Cohabitants_Children = c(
    round(mean(GGB$cohabitants_children, na.rm = TRUE), 2),
    round(median(GGB$cohabitants_children, na.rm = TRUE), 2),
    round(min(GGB$cohabitants_children, na.rm = TRUE), 2),
    round(max(GGB$cohabitants_children, na.rm = TRUE), 0)),
   `P-value (t-test)` = c(format(t.test(GGB$cohabitants_adults, GGB$cohabitants_children)$p.value, digits = 3), rep("", 3))  # Replicate p-value for each row
) %>% 
  flextable() %>%
  theme_vanilla() %>%
  align(align = 'center', part = 'all')

Statistic	Cohabitants_Adults	Cohabitants_Children	P-value (t-test)
Mean	2.08	0.54	0
Median	2.00	0.00
Min	0.00	0.00
Max	250.00	10.00

# Create a box plot for comparison -> doesn't work well
boxplot(GGB$cohabitants_adults, GGB$cohabitants_children, 
        names = c("cohabitants_adults", "cohabitants_children"),
        main = "Boxplot Comparison of cohabitants_adults and cohabitants_children",
        ylab = "Values",
        col = c("#FCB8B6", "#F72C26"),
        outline = FALSE)

Nationality

GGB %>%
  tabyl(nationality) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

nationality	n	percent
Belgian National	9,764	0.96310909
Not Belgian National	374	0.03689091
Total	10,138	1.00000000

Educational level

GGB %>%
  tabyl(education) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

education	n	percent	valid_percent
Geen	18	0.001775498	0.001794080
Lager onderwijs	54	0.005326494	0.005382239
Lager secundair onderwijs (getuigschrift 4de jaar)	283	0.027914776	0.028206917
Hoger secundair onderwijs	1,862	0.183665417	0.185587561
Niet-universitair hoger onderwijs / bachelor	3,608	0.355888735	0.359613276
Universitair onderwijs / master / doctoraat	4,208	0.415072006	0.419415927
Ik antwoord liever niet / Geen antwoord	0	0.000000000	0.000000000
	105	0.010357072
Total	10,138	1.000000000	1.000000000

Work situation

GGB %>%
  tabyl(employment) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

employment	n	percent
Anders	389	0.03837049
Ik ben arbeidsongeschikt (ziekte, invaliditeit)	536	0.05287039
Ik ben gepensioneerd (ook vervroegd pensioen of brugpensioen)	2,307	0.22755968
Ik ben huisvrouw of huisman	126	0.01242849
Ik ben scholier of student	203	0.02002367
Ik ben werkzoekend	127	0.01252713
Ik werk als zelfstandige, zelfstandig helper of als ondernemer	872	0.08601302
Ik werk deeltijds in loondienst	1,422	0.14026435
Ik werk voltijds in loondienst	4,156	0.40994279
Total	10,138	1.00000000

Income

GGB %>%
  tabyl(income) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

income	n	percent
0 – 1.000 € per maand	32	0.003156441
1.001 – 2.000 € per maand	783	0.077234168
2.001 – 3.000 € per maand	1,610	0.158808443
3.001 – 4.000 € per maand	1,674	0.165121326
4.001 – 5.000 € per maand	1,867	0.184158611
5.001 – 6.000 € per maand	1,309	0.129118169
meer dan 6.000 € per maand	1,045	0.103077530
Ik antwoord liever niet	1,616	0.159400276
Unknown	202	0.019925035
Total	10,138	1.000000000

Type of housing

GGB %>%
  tabyl(dwelling_type) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

dwelling_type	n	percent
Andere	225	0.02219373
Appartement/studio	2,132	0.21029789
Halfopen bebouwing	1,869	0.18435589
Open bebouwing	2,654	0.26178733
Rijwoning met tuin	2,862	0.28230420
Rijwoning zonder tuin	396	0.03906096
Total	10,138	1.00000000

Member of association

GGB %>%
  mutate(actiecomite_geluid = case_when(
    actiecomite_geluid == 29 ~ "Yes", 
    actiecomite_geluid == 30 ~ "No"
  )) %>% 
  tabyl(actiecomite_geluid) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

actiecomite_geluid	n	percent
No	9,856	0.97218386
Yes	282	0.02781614
Total	10,138	1.00000000

II. Participants’ quality of life and noise perception

Overall satisfaction

GGB %>%
  tabyl(quality_life) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

quality_life	n	percent
Helemaal niet tevreden	477	0.04705070
Niet tevreden	1,647	0.16245808
Noch tevreden, noch ontevreden	2,024	0.19964490
Tevreden	4,993	0.49250345
Zeer tevreden	997	0.09834287
Total	10,138	1.00000000

Annoyance from environmental noise

library(tidyverse)

# Create new data set with noise_annoyance and other variable, then reshape data
data3 <- GGB %>% 
  select(noise_annoyance, 
         employment,
         dwelling_type,
         gender,
         age_blv, 
         income_binary) %>% 
  gather('var', 'value', -noise_annoyance) %>% 
  mutate(noise_annoyance = factor(noise_annoyance, levels = 1:5, 
                                   labels = c('Helemaal niet gehinderd', 'Niet gehinderd', 
                                              'Tamelijk gehinderd', 'Ernstig gehinderd','Extreem gehinderd')))

## Warning: attributes are not identical across measure variables; they will be
## dropped

# vertical data
data_vertical <- data3 %>% filter(var %in% c("gender", "age_blv", "income_binary"))

# Vertical bar plot
ggplot(data_vertical, aes(x = value, fill = noise_annoyance)) +
  geom_bar(position = 'fill') +
  facet_wrap(~ var, scales = 'free_x') +
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A")) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.title = element_blank()
  ) +
  labs(
    x = "Value",
    y = "Proportion",
    title = "Noise Annoyance - Gender, Age, Income"
  )

# Employment bar plot
data_employment <- data3 %>% filter(var=="employment")
ggplot(data_employment, aes(x = value, fill = noise_annoyance)) +
  geom_bar(position = 'fill') +
  facet_wrap(~ var, scales = 'free_y') +
  coord_flip() +
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A")) +
  theme_minimal() +
  theme(
    axis.text.y = element_text(size = 9),
    legend.title = element_blank(),
    legend.position = "bottom"
  ) +
  labs(
    x = "Value",
    y = "Proportion",
    title = "Noise Annoyance by Employment"
  )+
  guides(fill=guide_legend(nrow=3))

# Dwelling Type bar plot
data_dwelling <- data3 %>% filter(var=="dwelling_type")
ggplot(data_dwelling, aes(x = value, fill = noise_annoyance)) +
  geom_bar(position = 'fill') +
  facet_wrap(~ var, scales = 'free_y') +
  coord_flip() +
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A")) +
  theme_minimal() +
  theme(
    axis.text.y = element_text(size = 9),
    legend.title = element_blank()
  ) +
  labs(
    x = "Value",
    y = "Proportion",
    title = "Noise Annoyance by Dwelling Type"
  )

Change in noise annoyance

GGB %>%
  select(noise_annoyance_cat,
         age_blv,income, nationality, dwelling_type, employment) %>% 
  tbl_summary(by = noise_annoyance_cat) %>% 
  add_p()

## The following errors were returned during `add_p()`:
## ✖ For variable `dwelling_type` (`noise_annoyance_cat`) and "estimate",
##   "p.value", "conf.low", and "conf.high" statistics: FEXACT error 5. The hash
##   table key cannot be computed because the largest key is larger than the
##   largest representable int. The algorithm cannot proceed. Reduce the
##   workspace, consider using 'simulate.p.value=TRUE' or another algorithm.
## ✖ For variable `employment` (`noise_annoyance_cat`) and "estimate", "p.value",
##   "conf.low", and "conf.high" statistics: FEXACT error 5. The hash table key
##   cannot be computed because the largest key is larger than the largest
##   representable int. The algorithm cannot proceed. Reduce the workspace,
##   consider using 'simulate.p.value=TRUE' or another algorithm.
## ✖ For variable `income` (`noise_annoyance_cat`) and "estimate", "p.value",
##   "conf.low", and "conf.high" statistics: FEXACT error 5. The hash table key
##   cannot be computed because the largest key is larger than the largest
##   representable int. The algorithm cannot proceed. Reduce the workspace,
##   consider using 'simulate.p.value=TRUE' or another algorithm.

Characteristic	Ernstig gehinderd N = 2,810¹	Extreem gehinderd N = 816¹	Helemaal niet gehinderd N = 219¹	Niet gehinderd N = 1,720¹	Tamelijk gehinderd N = 4,573¹	p-value²
age_blv						<0.001
17-30	122 (4.3%)	44 (5.4%)	22 (10%)	216 (13%)	370 (8.1%)
31-45	764 (27%)	234 (29%)	43 (20%)	481 (28%)	1,341 (29%)
46-60	991 (35%)	316 (39%)	58 (26%)	488 (28%)	1,417 (31%)
61+	933 (33%)	222 (27%)	96 (44%)	535 (31%)	1,445 (32%)
income
0 – 1.000 € per maand	9 (0.3%)	2 (0.2%)	1 (0.5%)	9 (0.5%)	11 (0.2%)
1.001 – 2.000 € per maand	235 (8.4%)	110 (13%)	17 (7.8%)	99 (5.8%)	322 (7.0%)
2.001 – 3.000 € per maand	505 (18%)	149 (18%)	38 (17%)	212 (12%)	706 (15%)
3.001 – 4.000 € per maand	486 (17%)	120 (15%)	32 (15%)	255 (15%)	781 (17%)
4.001 – 5.000 € per maand	478 (17%)	105 (13%)	34 (16%)	377 (22%)	873 (19%)
5.001 – 6.000 € per maand	311 (11%)	94 (12%)	26 (12%)	250 (15%)	628 (14%)
meer dan 6.000 € per maand	242 (8.6%)	61 (7.5%)	33 (15%)	218 (13%)	491 (11%)
Ik antwoord liever niet	478 (17%)	161 (20%)	36 (16%)	276 (16%)	665 (15%)
Unknown	66 (2.3%)	14 (1.7%)	2 (0.9%)	24 (1.4%)	96 (2.1%)
nationality						0.4
Belgian National	2,691 (96%)	785 (96%)	213 (97%)	1,659 (96%)	4,416 (97%)
Not Belgian National	119 (4.2%)	31 (3.8%)	6 (2.7%)	61 (3.5%)	157 (3.4%)
dwelling_type
Andere	65 (2.3%)	28 (3.4%)	4 (1.8%)	42 (2.4%)	86 (1.9%)
Appartement/studio	685 (24%)	220 (27%)	26 (12%)	257 (15%)	944 (21%)
Halfopen bebouwing	476 (17%)	137 (17%)	35 (16%)	340 (20%)	881 (19%)
Open bebouwing	641 (23%)	199 (24%)	123 (56%)	573 (33%)	1,118 (24%)
Rijwoning met tuin	835 (30%)	192 (24%)	29 (13%)	453 (26%)	1,353 (30%)
Rijwoning zonder tuin	108 (3.8%)	40 (4.9%)	2 (0.9%)	55 (3.2%)	191 (4.2%)
employment
Anders	118 (4.2%)	45 (5.5%)	7 (3.2%)	47 (2.7%)	172 (3.8%)
Ik ben arbeidsongeschikt (ziekte, invaliditeit)	181 (6.4%)	97 (12%)	7 (3.2%)	65 (3.8%)	186 (4.1%)
Ik ben gepensioneerd (ook vervroegd pensioen of brugpensioen)	644 (23%)	150 (18%)	70 (32%)	394 (23%)	1,049 (23%)
Ik ben huisvrouw of huisman	45 (1.6%)	11 (1.3%)	1 (0.5%)	16 (0.9%)	53 (1.2%)
Ik ben scholier of student	30 (1.1%)	12 (1.5%)	7 (3.2%)	63 (3.7%)	91 (2.0%)
Ik ben werkzoekend	41 (1.5%)	13 (1.6%)	2 (0.9%)	19 (1.1%)	52 (1.1%)
Ik werk als zelfstandige, zelfstandig helper of als ondernemer	246 (8.8%)	86 (11%)	23 (11%)	120 (7.0%)	397 (8.7%)
Ik werk deeltijds in loondienst	405 (14%)	102 (13%)	27 (12%)	250 (15%)	638 (14%)
Ik werk voltijds in loondienst	1,100 (39%)	300 (37%)	75 (34%)	746 (43%)	1,935 (42%)
¹ n (%)
² Pearson’s Chi-squared test

Taking actions due to noise annoyance

GGB %>%
  tabyl(q2_7_5) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

q2_7_5	n	percent	valid_percent
1	2,630	0.25942	1
	7,508	0.74058
Total	10,138	1.00000	1

Nature sounds

nature <- c("nature_birds", "nature_poultry", "nature_crickets", "nature_frogs", "nature_trees", "nature_water", "nature_animals", "nature_other", "nature_none")

# Recode 0 to "No" and 1 to "Yes" for nature variables
GGB[nature] <- lapply(GGB[nature], function(x) {
  factor(case_when(
    x == 1 ~ "Yes",
    x == 0 ~ "No",
    TRUE ~ "Unknown"
  ), levels = c("Unknown", "No", "Yes"))
})


# Prepare data for plotting
nature_data <- GGB[nature] %>%
  mutate_all(as.factor) %>%
  pivot_longer(cols = everything(), names_to = "Variable", values_to = "Response") %>%
  group_by(Variable, Response) %>%
  dplyr::summarise(n = n(), .groups = "drop") %>%
  group_by(Variable) %>%
  mutate(percent = n / sum(n) * 100)

# Plot 100% stacked bar chart
ggplot(nature_data, aes(x = Variable, y = percent, fill = Response)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(label = ifelse(Response == "Yes", paste0(round(percent, 1), "%"), "")),
            position = position_stack(vjust = 1),
            color = "white", size = 3.5, fontface = "bold") +
  scale_fill_manual(values = c("Unknow" = "#FDE0DF", "No" = "#F67D79", "Yes" = "#B21F1A")) +
  labs(title = "Bar Chart of Nature Variables",
       x = NULL, y = "Percentage", fill = "Response") +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Awareness about own produced noise

GGB %>%
  tabyl(bewustzijn_eigen_geluid_cat) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

bewustzijn_eigen_geluid_cat	n	percent
Always	3,648	0.35983429
Often	4,494	0.44328270
Sometimes	1,378	0.13592425
Rarely	468	0.04616295
Never	150	0.01479582
Total	10,138	1.00000000

III. Participants’ homes

Bedroom floor

GGB %>%
  tabyl(floor_bedroom) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

floor_bedroom	n	percent
Ondergronds	11	0.001085027
Gelijkvloers	1,128	0.111264549
1ste verdieping	6,168	0.608404024
2de verdieping	1,702	0.167883212
3de verdieping	453	0.044683370
4de verdieping	187	0.018445453
5de verdieping	103	0.010159795
6de verdieping	49	0.004833300
7de verdieping	41	0.004044190
8de verdieping	0	0.000000000
Hoger dan 8ste verdieping	75	0.007397909
Unknown	221	0.021799171
Total	10,138	1.000000000

Bedroom window

GGB %>%
  tabyl(window_street) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

window_street	n	percent
Yes	4,623	0.456007102
No	5,430	0.535608601
Unknown	85	0.008384297
Total	10,138	1.000000000

Bedroom window at night

GGB %>%
  tabyl(window_night_num) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

window_night_num	n	percent
Always closed	3,122	0.3079503
Mostly closed	3,444	0.3397120
Mostly open	2,433	0.2399882
Always open	1,139	0.1123496
Total	10,138	1.0000000

Environmental noise affect choice to close windows or window grilles at night

GGB %>%
  tabyl(q8_7) %>%
  adorn_totals() %>%
  setNames(c("Noise influence on closing windows at night", "n", "percent")) %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

Noise influence on closing windows at night	n	percent
Yes, this is the main reason	5,477	0.5402446
Yes, but due to other reasons	1,980	0.1953048
No	2,681	0.2644506
Total	10,138	1.0000000

IV. Participants’ sensitivity to sound

Distribution of Hyperacusis

hist(GGB$total_hyperacusis,
     col = "#F67D79",
     main = "Distribution of Hyperacusis",
     xlab = "Total of Hyperacusis")

Distribution of Hyperacusis

hist(GGB$wns_sum_score,
     col = "#F67D79",
     main = "Distribution of WNS",
     xlab = "Total score of WNS")

V. Sleep quality

Sleep Duration (Day vs. Night) _ Barchart

table(is.na(GGB$sleep_duration_day))

## 
## FALSE 
## 10138

# Reshape the data to long format
sleep_data <- GGB %>%
  select(sleep_duration_day, sleep_duration_night) %>%
  pivot_longer(cols = everything(), names_to = "Period", values_to = "Duration") %>%
  group_by(Period, Duration) %>%
  dplyr::summarise(n = n(), .groups = "drop") %>%
  group_by(Period) %>%
  mutate(percent = n / sum(n) * 100)

# Plot
ggplot(sleep_data, aes(x = Period, y = percent, fill = Duration)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(label = ifelse(percent > 5, paste0(round(percent, 1), "%"), "")),
            position = position_stack(vjust = 0.5),
            size = 3, color = "white") +
   coord_flip() +
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#ffa9a9", "#F67D79", "#F72C26", "#B21F1A","#7E2811","#780E00"  )) +
  labs(title = "Distribution of Sleep Duration (Day vs. Night)",
       x = NULL, y = "Percentage", fill = "Sleep Duration") +
  theme_minimal(base_size = 13)

Sleep Duration (Day vs. Night)_Table

# Define all possible 8 categories
categories <- sort(unique(na.omit(c(GGB$sleep_duration_day, GGB$sleep_duration_night))))

# Calculate frequencies for each variable
day_tab <- GGB %>%
  tabyl(sleep_duration_day) %>%
  adorn_totals("row") %>%
  dplyr::rename(category = sleep_duration_day,
         n_day = n, 
         percent_day = percent)

night_tab <- GGB %>%
  tabyl(sleep_duration_night) %>%
  adorn_totals("row") %>%
  dplyr::rename(category = sleep_duration_night,
         n_night = n, percent_night = percent)

# Merge into one table
full_join(day_tab, night_tab, by = "category") %>%
  filter(category != "Total") %>%   # remove total if not needed
  mutate(
    percent_day = round(percent_day * 100, 2),
    percent_night = round(percent_night * 100, 2)) %>% 
  flextable() %>%
  theme_vanilla() %>%
  autofit()

category	n_day	percent_day	n_night	percent_night
0-2 uur	2,283	22.52	10	0.10
10-12 uur	5	0.05	55	0.54
2-4 uur	88	0.87	81	0.80
4-6 uur	56	0.55	1,412	13.93
6-8 uur	211	2.08	6,820	67.27
8-10 uur	65	0.64	1,744	17.20
>12 uur	1	0.01	6	0.06
Niet	7,429	73.28	10	0.10

Time Taken to Fall Asleep

table(GGB$fall_asleep)

## 
##   >120 min   0-15 min  16-30 min  31-45 min  46-60 min 61-120 min 
##         76       4177       3445       1429        669        342

ggplot(GGB, aes(x = "Fall asleep", fill = fall_asleep)) +
  geom_bar(position = "fill", width = 0.5) +
  geom_text(stat = "count", aes(label = paste0(round((..count..) / sum(..count..) * 100, 1), "%")),
            position = position_fill(vjust = 0.5), color = "white", size = 4) +
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#ffa9a9", "#F67D79", "#F72C26", "#B21F1A","#7E2811","#780E00"  )) +
  labs(x = NULL, y = "Percentage", fill = "Time to fall asleep",
       title = "Time Taken to Fall Asleep") +
  theme_minimal(base_size = 14)

## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Awaken by disturbed

sleep_disturb <- c("sleep_disturb_road", "sleep_disturb_train", "sleep_disturb_tram", "sleep_disturb_air", "sleep_disturb_neighbor")

# Recode 
GGB[sleep_disturb] <- lapply(GGB[sleep_disturb], function(x) {
  factor(case_when(
    x == 1 ~ "Nooit",
    x == 2 ~ "Minder dan jaarlijks ",
    x == 3 ~ "Een of meerdere keren per jaar, maar niet maandelijks ",
    x == 4 ~ "Een of meerdere keren per maand, maar niet wekelijks ",
    x == 5 ~ "Een of meerdere keren per week, maar niet dagelijks ",
    x == 6 ~ "Elke nacht",
    TRUE ~ "Unknown"
  ), levels = c("Unknown", "Nooit", "Minder dan jaarlijks ", "Een of meerdere keren per week, maar niet dagelijks ","Een of meerdere keren per maand, maar niet wekelijks ", "Een of meerdere keren per jaar, maar niet maandelijks ", "Elke nacht"))
})

# Prepare data for plotting
sleep_disturb_data <- GGB[sleep_disturb] %>%
  mutate_all(as.factor) %>%
  pivot_longer(cols = everything(), names_to = "Variable", values_to = "Response") %>%
  group_by(Variable, Response) %>%
  dplyr::summarise(n = n(), .groups = "drop") %>%
  group_by(Variable) %>%
  mutate(percent = n / sum(n) * 100)

# Plot 100% stacked bar chart
ggplot(sleep_disturb_data, aes(x = Variable, y = percent, fill = Response)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(label = ifelse(Response == "Yes", paste0(round(percent, 1), "%"), "")),
            position = position_stack(vjust = 1),
            color = "white", size = 3.5, fontface = "bold") +
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A","#7E2811","#780E00"  )) +
  labs(title = "Bar Chart of Nature Variables",
       x = NULL, y = "Percentage", fill = "Response") +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Snoring –> couldn’t find the variable

ESS

Gender

ggplot(GGB, aes(x = gender, y = total_ess, fill = gender)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format", 
                     label.x = 4.45,
                     label.y = 5.5) + # Add test result
   scale_fill_manual(values = c("#FDE0DF", "#F67D79", "#F72C26", "#780E00"))+
  labs(
    title = "Distribution of Total ESS by Gender",
    x = "Gender",
    y = "Total ESS Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

Based on the one-way ANOVA (p = 0.99), there is no evidence of a statistically significant difference in total ESS scores across gender group

Age group

ggplot(GGB, aes(x = age_blv, y = total_ess, fill = age_blv)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format",
                     label.x = 4.45,
                     label.y = 5.5) + # Add test result
  scale_fill_manual(values = c("#FDE0DF", "#F67D79", "#F72C26", "#780E00"))+
  labs(
    title = "Distribution of Total ESS by Age Group",
    x = "Age group",
    y = "Total ESS Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

Based on the one-way ANOVA (p < 0.001), there is a statistically significant difference in total ESS scores across age group

Income

ggplot(GGB %>% filter(!is.na(income_binary)), aes(x = income_binary, y = total_ess, fill = income_binary)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "wilcox.test", label = "p.format",
                     label.x = 2.45,
                     label.y = 5.5) + # Add test result
  scale_fill_manual(values = c( "#F67D79", "#780E00"))+
  labs(
    title = "Distribution of Total ESS by Income",
    x = "Income",
    y = "Total ESS Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

Based on the Wilcoxon test (p = 0.72), there is no evidence of a statistically significant difference in total ESS scores across income group

Dwelling Type

ggplot(GGB, aes(x = dwelling_type, y = total_ess, fill = dwelling_type)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format",
                     label.x = 6.40,
                     label.y = 5.5) + # Add test result
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A","#7E2811" ))+
  labs(
    title = "Distribution of Total ESS by Dwelling Type",
    x = "Dwelling Type",
    y = "Total ESS Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

Based on the anova test (p = 0.35), there is no evidence of a statistically significant difference in total ESS scores across dwelling type.

Employment

table(GGB$employment)

## 
##                                                         Anders 
##                                                            389 
##                Ik ben arbeidsongeschikt (ziekte, invaliditeit) 
##                                                            536 
##  Ik ben gepensioneerd (ook vervroegd pensioen of brugpensioen) 
##                                                           2307 
##                                    Ik ben huisvrouw of huisman 
##                                                            126 
##                                     Ik ben scholier of student 
##                                                            203 
##                                             Ik ben werkzoekend 
##                                                            127 
## Ik werk als zelfstandige, zelfstandig helper of als ondernemer 
##                                                            872 
##                                Ik werk deeltijds in loondienst 
##                                                           1422 
##                                 Ik werk voltijds in loondienst 
##                                                           4156

ggplot(GGB, aes(x = employment, y = total_ess, fill = employment)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format",
                     label.x = 9.35,
                     label.y = 4.5) + # Add test result
  labs(
    title = "Distribution of Total ESS by Employment",
    x = "Employment",
    y = "Total ESS Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

FAS

Gender

ggplot(GGB, aes(x = gender, y = total_fas, fill = gender)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format", 
                     label.x = 4.45,
                     label.y = 5.5) + # Add test result
  scale_fill_manual(values = c("#FDE0DF", "#F67D79", "#F72C26", "#780E00"))+
  labs(
    title = "Distribution of Total fas by Gender",
    x = "Gender",
    y = "Total fas Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

Based on the one-way ANOVA (p < 0.001), there is a statistically significant difference in total fas scores across gender group

Age group

ggplot(GGB, aes(x = age_blv, y = total_fas, fill = age_blv)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format",
                     label.x = 4.45,
                     label.y = 7.5) + # Add test result
  scale_fill_manual(values = c("#FDE0DF", "#F67D79", "#F72C26", "#780E00"))+
  labs(
    title = "Distribution of Total fas by Age Group",
    x = "Age group",
    y = "Total fas Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

Based on the one-way ANOVA (p < 0.001), there is a statistically significant difference in total fas scores across age group

Income

ggplot(GGB %>% filter(!is.na(income_binary)), aes(x = income_binary, y = total_fas, fill = income_binary)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "t.test", label = "p.format",
                     label.x = 2.45,
                     label.y = 7.5) + # Add test result
  scale_fill_manual(values = c( "#F67D79", "#780E00"))+
  labs(
    title = "Distribution of Total fas by Income",
    x = "Income",
    y = "Total fas Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

Based on the Wilcoxon test (p < 0.001), there is a statistically significant difference in total fas scores across income group

Dwelling Type

ggplot(GGB, aes(x = dwelling_type, y = total_fas, fill = dwelling_type)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format",
                     label.x = 6.40,
                     label.y = 5.5) + # Add test result
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A","#7E2811" ))+
  labs(
    title = "Distribution of Total fas by Dwelling Type",
    x = "Dwelling Type",
    y = "Total fas Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

Based on the one-way ANOVA (p < 0.001), there is a statistically significant difference in total fas scores across dwelling type.

Employment

table(GGB$employment)

## 
##                                                         Anders 
##                                                            389 
##                Ik ben arbeidsongeschikt (ziekte, invaliditeit) 
##                                                            536 
##  Ik ben gepensioneerd (ook vervroegd pensioen of brugpensioen) 
##                                                           2307 
##                                    Ik ben huisvrouw of huisman 
##                                                            126 
##                                     Ik ben scholier of student 
##                                                            203 
##                                             Ik ben werkzoekend 
##                                                            127 
## Ik werk als zelfstandige, zelfstandig helper of als ondernemer 
##                                                            872 
##                                Ik werk deeltijds in loondienst 
##                                                           1422 
##                                 Ik werk voltijds in loondienst 
##                                                           4156

ggplot(GGB, aes(x = employment, y = total_fas, fill = employment)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format",
                     label.x = 9.35,
                     label.y = 4.5) + # Add test result
  labs(
    title = "Distribution of Total fas by Employment",
    x = "Employment",
    y = "Total fas Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

VI. Overall health

stress

Gender

ggplot(GGB, aes(x = gender, y = pss_total, fill = gender)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format", 
                     label.x = 4.45,
                     label.y = 5.5) + # Add test result
  scale_fill_manual(values = c("#FDE0DF", "#F67D79", "#F72C26", "#780E00"))+
  labs(
    title = "Distribution of Total stress by Gender",
    x = "Gender",
    y = "Total stress Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

Based on the one-way ANOVA (p < 0.001), there is a statistically significant difference in total stress scores across gender group

Age group

ggplot(GGB, aes(x = age_blv, y = pss_total, fill = age_blv)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format",
                     label.x = 4.45,
                     label.y = 5.5) + # Add test result
  scale_fill_manual(values = c("#FDE0DF", "#F67D79", "#F72C26", "#780E00"))+
  labs(
    title = "Distribution of Total stress by Age Group",
    x = "Age group",
    y = "Total stress Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

Based on the one-way ANOVA (p < 0.001), there is a statistically significant difference in total stress scores across age group

Income

ggplot(GGB %>% filter(!is.na(income_binary)), aes(x = income_binary, y = pss_total, fill = income_binary)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "wilcox.test", label = "p.format",
                     label.x = 2.45,
                     label.y = 5.5) + # Add test result
  scale_fill_manual(values = c( "#F67D79", "#780E00"))+
  labs(
    title = "Distribution of Total stress by Income",
    x = "Income",
    y = "Total stress Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

Based on the Wilcoxon (p < 0.001), there is a statistically significant difference in total stress scores across income group

Dwelling Type

ggplot(GGB, aes(x = dwelling_type, y = pss_total, fill = dwelling_type)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format",
                     label.x = 6.40,
                     label.y = 5.5) + # Add test result
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A","#7E2811" ))+
  labs(
    title = "Distribution of Total stress by Dwelling Type",
    x = "Dwelling Type",
    y = "Total stress Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

Based on the anova test (p < 0.001), there is a statistically significant difference in total stress scores across dwelling type.

Employment

table(GGB$employment)

## 
##                                                         Anders 
##                                                            389 
##                Ik ben arbeidsongeschikt (ziekte, invaliditeit) 
##                                                            536 
##  Ik ben gepensioneerd (ook vervroegd pensioen of brugpensioen) 
##                                                           2307 
##                                    Ik ben huisvrouw of huisman 
##                                                            126 
##                                     Ik ben scholier of student 
##                                                            203 
##                                             Ik ben werkzoekend 
##                                                            127 
## Ik werk als zelfstandige, zelfstandig helper of als ondernemer 
##                                                            872 
##                                Ik werk deeltijds in loondienst 
##                                                           1422 
##                                 Ik werk voltijds in loondienst 
##                                                           4156

ggplot(GGB, aes(x = employment, y = pss_total, fill = employment)) +
  geom_violin(trim = FALSE, alpha = 0.4) +          # Violin plot
  geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) +  # Boxplot inside violin
  coord_flip()+
  stat_compare_means(method = "anova", label = "p.format",
                     label.x = 9.35,
                     label.y = 4.5) + # Add test result
  labs(
    title = "Distribution of Total stress by Employment",
    x = "Employment",
    y = "Total stress Score"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

General physical health status

GGB %>%
  tabyl(q4_2) %>%
  adorn_totals() %>%
  setNames(c("General physical health status", "n", "percent")) %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

General physical health status	n	percent
Heel goed	1,471	0.145097652
Eerder goed	4,767	0.470211087
Gemiddeld	3,033	0.299171434
Eerder slecht	810	0.079897416
Heel slecht	57	0.005622411
Total	10,138	1.000000000

Physical complaints

physical_complaints <- c("q4_3_1", "q4_3_2", "q4_3_3", "q4_3_4", "q4_3_5", "q4_3_6", "q4_3_7", "q4_3_8")

# Recode 
GGB[physical_complaints] <- lapply(GGB[physical_complaints], function(x) {
  factor(case_when(
    x == 1 ~ "Nooit",
    x == 2 ~ "Een paar keer per jaar",
    x == 3 ~ "Een paar keer per maand",
    x == 4 ~ "Een paar keer per week",
    x == 5 ~ "Dagelijks"
  ), levels = c("Nooit", "Een paar keer per jaar", "Een paar keer per maand","Een paar keer per week", "Dagelijks"))
})

# Prepare data for plotting
physical_complaints_data <- GGB[physical_complaints] %>%
  mutate_all(as.factor) %>%
  pivot_longer(cols = everything(), names_to = "Variable", values_to = "Response") %>%
  group_by(Variable, Response) %>%
  dplyr::summarise(n = n(), .groups = "drop") %>%
  group_by(Variable) %>%
  mutate(percent = n / sum(n) * 100)

# Relabel variable names
physical_complaints_data <- physical_complaints_data %>%
  mutate(Variable = revalue(Variable,
    c("q4_3_1" = "Hoofdpijn",
    "q4_3_2" = "Duizeligheid",
    "q4_3_3" = "Hartkloppingen",
    "q4_3_4" = "Maag- en darmklachten",
    "q4_3_5" = "Slaperigheid",
    "q4_3_6" = "Vermoeidheid",
    "q4_3_7" = "Slapeloosheid",
    "q4_3_8" = "Psychische stoornissen"
  )))

## The following `from` values were not present in `x`: q4_3_2, q4_3_3, q4_3_4, q4_3_5, q4_3_6, q4_3_7, q4_3_8

## The following `from` values were not present in `x`: q4_3_1, q4_3_3, q4_3_4, q4_3_5, q4_3_6, q4_3_7, q4_3_8

## The following `from` values were not present in `x`: q4_3_1, q4_3_2, q4_3_4, q4_3_5, q4_3_6, q4_3_7, q4_3_8

## The following `from` values were not present in `x`: q4_3_1, q4_3_2, q4_3_3, q4_3_5, q4_3_6, q4_3_7, q4_3_8

## The following `from` values were not present in `x`: q4_3_1, q4_3_2, q4_3_3, q4_3_4, q4_3_6, q4_3_7, q4_3_8

## The following `from` values were not present in `x`: q4_3_1, q4_3_2, q4_3_3, q4_3_4, q4_3_5, q4_3_7, q4_3_8

## The following `from` values were not present in `x`: q4_3_1, q4_3_2, q4_3_3, q4_3_4, q4_3_5, q4_3_6, q4_3_8

## The following `from` values were not present in `x`: q4_3_1, q4_3_2, q4_3_3, q4_3_4, q4_3_5, q4_3_6, q4_3_7

# Plot 100% stacked bar chart
ggplot(physical_complaints_data, aes(x = Variable, y = percent, fill = Response)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(label = ifelse(Response == "Yes", paste0(round(percent, 1), "%"), "")),
            position = position_stack(vjust = 1),
            color = "white", size = 3.5, fontface = "bold") +
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F72C26", "#B21F1A","#7E2811"  )) +
  labs(title = "Bar Chart of Physical complaints",
       x = NULL, y = "Percentage", fill = "Response") +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Chornic condition

freq <- table(GGB$q4_4)
pct <- round(100 * freq / sum(freq), 1)
labels <- paste0(names(freq), ": ", freq, " (", pct, "%)")
pie(freq, labels = labels, main = "Pie chart of chronic condition", col= c("#FDE0DF", "#F72C26"))

Chronic disorders

chronic_disorders <- c("q4_5_1", "q4_5_2", "q4_5_3", "q4_5_4", "q4_5_5", "q4_5_6", "q4_5_7")

# Prepare data for plotting
chronic_disorders_data <-  GGB %>%
  select(all_of(chronic_disorders)) %>%
  pivot_longer(cols = everything(), names_to = "variable", values_to = "value") %>%  
  filter(value == 1) 

summary_chronic_disorders <- aggregate(value == 1 ~ variable, data = chronic_disorders_data , sum)


# Relabel variable names
names(summary_chronic_disorders)[2] <- "frequency"
summary_chronic_disorders <- summary_chronic_disorders %>%
  mutate(variable = revalue(variable,
    c("q4_5_1" = "Aandoeningen van het ademhalingsstelsel 
      (bijvoorbeeld astma, COPD)",
    "q4_5_2" = "Aandoeningen van het spijsverteringsstelsel
    (bijvoorbeeld ziekte van Crohn, levercirrose)",
    "q4_5_3" = "Hart- en vaataandoeningen 
    (bijvoorbeeld hoge bloeddruk, hartritmestoornissen)",
    "q4_5_4" = "Neurologische en psychische aandoeningen 
    (bijvoorbeeld epilepsie, multiple sclerose)",
    "q4_5_5" = "Metabole aandoeningen 
    (bijvoorbeeld diabetes, obesitas)",
    "q4_5_6" = "Andere",
    "q4_5_7" = "Slaapapneu"
  ))) %>% 
  mutate(variable = reorder(variable, frequency))


# Plot bar chart
ggplot(summary_chronic_disorders, aes(x = variable, y = frequency)) +
  geom_bar(stat = "identity", width = 0.7, fill = "red") +
  geom_text(aes(label = frequency), hjust = -0.1, size = 4) +
  coord_flip()+
  labs(title = "Bar Chart of Chronic disorders",
       x = NULL, y = "Frequency") +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
         plot.title = element_text(hjust = 1))+ 
  expand_limits(y = max(summary_chronic_disorders$frequency) * 1.1)

Allergic condition

freq_4_6 <- table(GGB$q4_6)
pct_4_6 <- round(100 * freq_4_6 / sum(freq_4_6), 1)
labels_4_6 <- paste0(names(freq_4_6), ": ", freq_4_6, " (", pct_4_6, "%)")
pie(freq_4_6, labels = labels_4_6, main = "Pie chart of allergic condition", col= c("#FDE0DF", "#F72C26"))

Allergic disorders

ggplot(GGB %>% filter(!is.na(q4_7)), aes(x=q4_7)) +
  geom_bar(width = 0.7, fill = "red")+
  coord_flip()+
  labs(title = "Bar graph of allergic disorders",
    x = "Allergic disorders")+
  theme(axis.text.x = element_text(hjust = 1),
         plot.title = element_text(hjust = 1))

Respiratory infections

ggplot(data = (GGB %>%
         filter(!is.na(q4_8)) %>%
         filter(q4_8 >= quantile(q4_8, 0.25) - 1.5 * IQR(q4_8),
                q4_8 <= quantile(q4_8, 0.75) + 1.5 * IQR(q4_8))),
       aes(x = q4_8)) +
  geom_histogram(binwidth = 1, fill = "red", color = "black") +
  labs(title = "Histogram of Respiratory infections",
       x = "Respiratory infections (times)", y = "Count") +
  theme_minimal()

Smoking

GGB %>%
    mutate(q4_9 = case_when(
    q4_9 == 1 ~ "Yes", 
    q4_9 == 2 ~ "No"
  )) %>% 
  tabyl(q4_9) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

q4_9	n	percent
No	9,287	0.91605839
Yes	851	0.08394161
Total	10,138	1.00000000

Moderate to intense exercise

ggplot(data = (GGB %>%
         filter(!is.na(q4_13)) %>%
         filter(q4_13 >= quantile(q4_13, 0.25) - 1.5 * IQR(q4_13),
                q4_13 <= quantile(q4_13, 0.75) + 1.5 * IQR(q4_13))),
       aes(x = q4_13)) +
  geom_histogram(binwidth = 1, fill = "red", color = "black") +
  labs(title = "Histogram of Moderate to intense exercise",
       x = "Moderate to intense exercise (hours)", y = "Count") +
  theme_minimal()

Time in outdoor environment

ggplot(data = (GGB %>%
         filter(!is.na(q4_14)) %>%
         filter(q4_14 >= quantile(q4_14, 0.25) - 1.5 * IQR(q4_14),
                q4_14 <= quantile(q4_14, 0.75) + 1.5 * IQR(q4_14))),
       aes(x = q4_14)) +
  geom_histogram(binwidth = 1, fill = "red", color = "black") +
  labs(title = "Histogram of Time in outdoor environment ",
       x = "Time in outdoor environment (hours)", y = "Count") +
  theme_minimal()

Time in using smartphone

ggplot(data = (GGB %>%
         filter(!is.na(smartphone_use)) %>%
         filter(smartphone_use >= quantile(smartphone_use, 0.25) - 1.5 * IQR(smartphone_use),
                smartphone_use <= quantile(smartphone_use, 0.75) + 1.5 * IQR(smartphone_use))),
       aes(x = smartphone_use)) +
  geom_histogram(binwidth = 1, fill = "red", color = "black") +
  labs(title = "Histogram of Time in using smartphone",
       x = "Time in using smartphone (hours)", y = "Count") +
  theme_minimal()

Driving frequency

GGB %>%
  tabyl(q4_16) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

q4_16	n	percent
Nooit (al dan niet in het bezit van een rijbewijs)	1,016	0.10021701
Minder dan jaarlijks	122	0.01203393
Een of meerdere keren per jaar, maar niet maandelijks	490	0.04833300
Een of meerdere keren per maand, maar niet wekelijks	1,549	0.15279148
Een of meerdere keren per week, maar niet dagelijks	4,421	0.43608207
Dagelijks	2,540	0.25054251
Total	10,138	1.00000000

VII. Participants’ hearing status

Hearing status

# Prepare data for plotting
hearing_status_data <- GGB %>%
  select(c("q7_2_1", "q7_2_2")) %>%
  mutate_all(as.factor) %>%
  pivot_longer(cols = everything(), names_to = "Variable", values_to = "Response") %>%
  group_by(Variable, Response) %>%
  dplyr::summarise(n = n(), .groups = "drop") %>%
  group_by(Variable) %>%
  mutate(percent = n / sum(n) * 100) %>% 
  mutate(Variable = case_when(
    Variable == "q7_2_1" ~ "Right ear",
    Variable == "q7_2_2" ~ "Left ear"
  ))

# Plot 100% stacked bar chart
ggplot(hearing_status_data, aes(x = Variable, y = percent, fill = Response)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(label = ifelse(Response == "Yes", paste0(round(percent, 1), "%"), "")),
            position = position_stack(vjust = 1),
            color = "white", size = 3.5, fontface = "bold") +
  coord_flip()+
  scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F72C26", "#7E2811"  )) +
  labs(title = "Bar Chart of Hearing Status",
       x = NULL, y = "Percentage", fill = "Response") +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(hjust = 1),
        legend.position = "bottom")+
  guides(fill = guide_legend(nrow = 4))

Hearing problem

GGB %>%
  tabyl(q7_3) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

q7_3	n	percent
Ik heb nooit gehoorproblemen	6,520	0.6431249
Ik heb een (licht) gehoorprobleem of twijfel of ik er een heb	3,618	0.3568751
Total	10,138	1.0000000

Tinnitus

GGB %>%
  tabyl(q7_5) %>%
  adorn_totals() %>%
  flextable() %>%
  theme_vanilla() %>%
  autofit()

q7_5	n	percent
Ja	2,660	0.26237917
Ik weet het niet	640	0.06312882
Nee	6,838	0.67449201
Total	10,138	1.00000000

GGB_analyses

2025-07-30

Before analyses

I. Sociodemographics

Number of participant

Participants per age and gender

Family composition

Nationality

Educational level

Work situation

Income

Type of housing

Member of association

II. Participants’ quality of life and noise perception

Overall satisfaction

Annoyance from environmental noise

Change in noise annoyance

Taking actions due to noise annoyance

Nature sounds

Awareness about own produced noise

III. Participants’ homes

Bedroom floor

Bedroom window

Bedroom window at night

Environmental noise affect choice to close windows or window grilles at night

IV. Participants’ sensitivity to sound

Distribution of Hyperacusis

Distribution of Hyperacusis

V. Sleep quality

Sleep Duration (Day vs. Night) _ Barchart

Sleep Duration (Day vs. Night)_Table

Time Taken to Fall Asleep

Awaken by disturbed

Snoring –> couldn’t find the variable

ESS

Gender

Age group

Income

Dwelling Type

Employment

FAS

Gender

Age group

Income

Dwelling Type

Employment

VI. Overall health

stress

Gender

Age group

Income

Dwelling Type

Employment

General physical health status

Physical complaints

Chornic condition

Chronic disorders

Allergic condition

Allergic disorders

Respiratory infections

Smoking

Moderate to intense exercise

Time in outdoor environment

Time in using smartphone

Driving frequency

VII. Participants’ hearing status

Hearing status

Hearing problem

Tinnitus