library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## âś” forcats 1.0.0 âś” readr 2.1.5
## âś” ggplot2 3.5.2 âś” stringr 1.5.1
## âś” lubridate 1.9.4 âś” tibble 3.2.1
## âś” purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## âś– dplyr::filter() masks stats::filter()
## âś– dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
## Warning: package 'janitor' was built under R version 4.4.3
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(flextable)
## Warning: package 'flextable' was built under R version 4.4.3
##
## Attaching package: 'flextable'
##
## The following object is masked from 'package:purrr':
##
## compose
library(rlang)
## Warning: package 'rlang' was built under R version 4.4.3
##
## Attaching package: 'rlang'
##
## The following objects are masked from 'package:purrr':
##
## %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
## flatten_raw, invoke, splice
library(stringr)
library(gtsummary)
## Warning: package 'gtsummary' was built under R version 4.4.3
##
## Attaching package: 'gtsummary'
##
## The following object is masked from 'package:flextable':
##
## continuous_summary
library(rstatix)
##
## Attaching package: 'rstatix'
##
## The following object is masked from 'package:janitor':
##
## make_clean_names
##
## The following object is masked from 'package:stats':
##
## filter
library(ggplot2)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
##
## The following objects are masked from 'package:rstatix':
##
## desc, mutate
##
## The following object is masked from 'package:gtsummary':
##
## mutate
##
## The following object is masked from 'package:purrr':
##
## compact
##
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library(ggpubr)
##
## Attaching package: 'ggpubr'
##
## The following object is masked from 'package:plyr':
##
## mutate
##
## The following objects are masked from 'package:flextable':
##
## border, font, rotate
#data <- read.csv("2025-07-10_GGB_Merged_PERSONALINFO_NewGGBParticipants_WITHGPS.csv")
GGB <- read.csv("C:/Users/thuyb/OneDrive/Desktop/Student Job/Dataset/2025-07-10_GGB_Merged_PERSONALINFO_NewGGBParticipants_WITHGPS.csv")
#labels <- read.csv("/Users/forllychavez/Library/Mobile Documents/com~apple~CloudDocs/Desktop/Escritorio - MacBook Air de Forlly/Master of Epidemiology/2nd Year/Student Job 2.1/Documents to read/Analyses GGB Forlly Romero/Dataset/GGB labels.csv")
GGB <- GGB %>%
clean_names()
#Age categories based on blv
GGB<- GGB %>%
mutate(
age_blv = case_when(age %in% c(17:30) ~ '17-30',
age %in% c(31:45) ~ '31-45',
age %in% c(46:60) ~ '46-60',
age %in% c(61:92) ~ '61+'
) %>% factor(levels = c('17-30', '31-45', '46-60','61+' )))
#Gender
GGB <- GGB %>%
mutate(gender = factor(gender,
levels = c("Man",
"Vrouw",
"Niet-binaire geslachtsidentiteit",
"Ik antwoord liever niet"),
ordered = TRUE))
# Income
GGB <- GGB %>%
mutate(
income = case_when(
income %in% c("0 – 1.000 € per maand",
"1.001 – 2.000 € per maand",
"2.001 – 3.000 € per maand",
"3.001 – 4.000 € per maand",
"4.001 – 5.000 € per maand",
"5.001 – 6.000 € per maand",
"meer dan 6.000 € per maand",
"Ik antwoord liever niet") ~ income,
TRUE ~ "Unknown" # Everything else becomes "Unknown"
),
income = factor(income,
levels = c("0 – 1.000 € per maand",
"1.001 – 2.000 € per maand",
"2.001 – 3.000 € per maand",
"3.001 – 4.000 € per maand",
"4.001 – 5.000 € per maand",
"5.001 – 6.000 € per maand",
"meer dan 6.000 € per maand",
"Ik antwoord liever niet",
"Unknown"),
ordered = TRUE)
)
# Income binary
GGB <- GGB %>%
mutate(income_binary = case_when(
income %in% c("0 – 1.000 € per maand",
"1.001 – 2.000 € per maand",
"2.001 – 3.000 € per maand") ~ "≤3000",
income %in% c("3.001 – 4.000 € per maand",
"4.001 – 5.000 € per maand",
"5.001 – 6.000 € per maand",
"meer dan 6.000 € per maand") ~ ">3000",
income == "Ik antwoord liever niet" ~ NA_character_,
TRUE ~ NA_character_
))
# Education
GGB <- GGB %>%
mutate(education = factor(education,
levels = c("Geen",
"Lager onderwijs",
"Lager secundair onderwijs (getuigschrift 4de jaar)",
"Hoger secundair onderwijs",
"Niet-universitair hoger onderwijs / bachelor",
"Universitair onderwijs / master / doctoraat",
"Ik antwoord liever niet / Geen antwoord"),
ordered = TRUE))
# Nationality
GGB <- GGB %>%
mutate(nationality_belgian = as.factor(nationality_belgian)) %>%
mutate(nationality = case_when(
nationality_belgian == 1 ~ "Belgian National",
is.na(nationality_belgian) ~ "Not Belgian National",
TRUE ~"Not Belgian National"
))
# Noise annoyance
GGB <- GGB %>%
mutate(
noise_annoyance_cat = case_when(
noise_annoyance == 1 ~ 'Helemaal niet gehinderd',
noise_annoyance == 2 ~ 'Niet gehinderd',
noise_annoyance == 3 ~ 'Tamelijk gehinderd',
noise_annoyance == 4 ~ 'Ernstig gehinderd',
noise_annoyance == 5 ~ 'Extreem gehinderd'
))
# Awareness about own produced noise
GGB <- GGB %>%
mutate(
bewustzijn_eigen_geluid_cat = factor(case_when(
bewustzijn_eigen_geluid == 1 ~ 'Always',
bewustzijn_eigen_geluid == 2 ~ 'Often',
bewustzijn_eigen_geluid == 3 ~ 'Sometimes',
bewustzijn_eigen_geluid == 4 ~ 'Rarely',
bewustzijn_eigen_geluid == 5 ~ 'Never'),
levels = c('Always', 'Often', 'Sometimes', 'Rarely', 'Never')))
# QUality of life
GGB <- GGB %>%
mutate(
quality_life = case_when(
gen_satisfaction == 1 ~ 'Zeer tevreden',
gen_satisfaction == 2 ~ 'Tevreden',
gen_satisfaction == 3 ~ 'Noch tevreden, noch ontevreden',
gen_satisfaction == 4 ~ 'Niet tevreden',
gen_satisfaction == 5 ~ 'Helemaal niet tevreden'
))
# Bedroom floor
GGB <- GGB %>%
mutate(
floor_bedroom = case_when(
floor_bedroom %in% c("Ondergronds",
"Gelijkvloers",
"1ste verdieping",
"2de verdieping",
"3de verdieping",
"4de verdieping",
"5de verdieping",
"6de verdieping",
"7de verdieping",
"8de verdieping",
"Hoger dan 8ste verdieping") ~ floor_bedroom,
TRUE ~ "Unknown" # Everything else becomes "Unknown"
),
floor_bedroom = factor(floor_bedroom,
levels = c("Ondergronds",
"Gelijkvloers",
"1ste verdieping",
"2de verdieping",
"3de verdieping",
"4de verdieping",
"5de verdieping",
"6de verdieping",
"7de verdieping",
"8de verdieping",
"Hoger dan 8ste verdieping",
"Unknown"),
ordered = TRUE)
)
# Window street
GGB <- GGB %>%
mutate(
window_street = factor(case_when(
window_street == 1 ~ "Yes",
window_street == 0 ~ "No",
TRUE ~ "Unknown"),
levels = c("Yes", "No", "Unknown")))
# Bedroom windows at night
GGB <- GGB %>%
mutate(
window_night_num = factor(case_when(
window_night_num == 5 ~ "Always closed",
window_night_num == 6 ~ "Mostly closed",
window_night_num == 7 ~ "Mostly open",
window_night_num == 8 ~ "Always open"),
levels = c("Always closed", "Mostly closed", "Mostly open", "Always open")))
# Environmental noise affect your choice to close windows or window grilles at night
GGB <- GGB %>%
mutate(
q8_7 = factor(case_when(
q8_7 == 5 ~ "Yes, this is the main reason",
q8_7 == 6 ~ "Yes, but due to other reasons",
q8_7 == 7 ~ "No"),
levels = c("Yes, this is the main reason", "Yes, but due to other reasons", "No")))
# General physical health status
GGB <- GGB %>%
mutate(
q4_2 = factor(case_when(
q4_2 == 1 ~ "Heel goed",
q4_2 == 2 ~ "Eerder goed",
q4_2 == 3 ~ "Gemiddeld",
q4_2 == 4 ~ "Eerder slecht",
q4_2 == 5 ~ "Heel slecht"),
levels = c("Heel goed", "Eerder goed", "Gemiddeld", "Eerder slecht", "Heel slecht")))
# Chronic disorders
GGB <- GGB %>%
mutate(q4_4 = case_when(
q4_4 == 1 ~ "Yes",
q4_4 == 2 ~ "No"
))
# Allergic disorders
GGB <- GGB %>%
mutate(q4_6 = case_when(
q4_6 == 1 ~ "Yes",
q4_6 == 2 ~ "No"
))
GGB <- GGB %>%
mutate(q4_7 = case_when(
q4_7 == 1 ~ "Allergische astma",
q4_7 == 2 ~ "Inhalatieallergie
(hooikoorts/pollen, huisstofmijt, dieren)",
q4_7 == 3 ~ "Voedselallergie",
q4_7 == 4 ~ "Eczeem, atopische dermatitis",
q4_7 == 5 ~ "Andere"
))
# Driving frequency
GGB <-GGB %>%
mutate(q4_16 = case_when(
q4_16 == 1 ~ "Nooit (al dan niet in het bezit van een rijbewijs)",
q4_16 == 2 ~ "Minder dan jaarlijks",
q4_16 == 3 ~ "Een of meerdere keren per jaar, maar niet maandelijks",
q4_16 == 4 ~ "Een of meerdere keren per maand, maar niet wekelijks",
q4_16 == 5 ~ "Een of meerdere keren per week, maar niet dagelijks",
q4_16 == 6 ~ "Dagelijks"),
q4_16 = factor(q4_16,
level = c("Nooit (al dan niet in het bezit van een rijbewijs)", "Minder dan jaarlijks", "Een of meerdere keren per jaar, maar niet maandelijks", "Een of meerdere keren per maand, maar niet wekelijks", "Een of meerdere keren per week, maar niet dagelijks", "Dagelijks" )))
# Participants' hearing status
GGB <-GGB %>% # right ear
mutate(q7_2_1 = factor(case_when(
q7_2_1 == 1 ~ "Normaal gehoor",
q7_2_1 == 2 ~ "Verminderd of geen gehoor, maar zonder gebruik van hoortoestel/implantaat",
q7_2_1 == 3 ~ "Met hoortoestel/ gehoorapparaat",
q7_2_1 == 4 ~ "Met implantaat (vb. cochleair implantaat, middenoorimplantaat)"),
level = c("Normaal gehoor", "Verminderd of geen gehoor, maar zonder gebruik van hoortoestel/implantaat", "Met hoortoestel/ gehoorapparaat", "Met implantaat (vb. cochleair implantaat, middenoorimplantaat)" )))
GGB <-GGB %>% # left ear
mutate(q7_2_2 = factor(case_when(
q7_2_2 == 1 ~ "Normaal gehoor",
q7_2_2 == 2 ~ "Verminderd of geen gehoor, maar zonder gebruik van hoortoestel/implantaat",
q7_2_2 == 3 ~ "Met hoortoestel/ gehoorapparaat",
q7_2_2 == 4 ~ "Met implantaat (vb. cochleair implantaat, middenoorimplantaat)"),
level = c("Normaal gehoor", "Verminderd of geen gehoor, maar zonder gebruik van hoortoestel/implantaat", "Met hoortoestel/ gehoorapparaat", "Met implantaat (vb. cochleair implantaat, middenoorimplantaat)" )))
# Hearing problem
GGB <-GGB %>%
mutate(q7_3 = factor(case_when(
q7_3 == 1 ~ "Ik heb nooit gehoorproblemen",
q7_3 == 2 ~ "Ik heb een (licht) gehoorprobleem of twijfel of ik er een heb"),
level = c("Ik heb nooit gehoorproblemen", "Ik heb een (licht) gehoorprobleem of twijfel of ik er een heb")))
# Tinnitus
GGB <-GGB %>%
mutate(q7_5 = factor(case_when(
q7_5 == 1 ~ "Ja",
q7_5 == 2 ~ "Ik weet het niet",
q7_5 == 3 ~ "Nee"),
level = c("Ja", "Ik weet het niet", "Nee")))
GGB %>%
mutate(stad_grouped= ifelse(stad %in% c("Antwerpen", "Gent", "Leuven"), stad, "Other")) %>%
tabyl(stad_grouped) %>%
adorn_totals() %>%
flextable() %>%
theme_vanilla() %>%
autofit()
stad_grouped | n | percent |
|---|---|---|
Antwerpen | 1,759 | 0.17350562 |
Gent | 1,317 | 0.12990728 |
Leuven | 560 | 0.05523772 |
Other | 6,502 | 0.64134938 |
Total | 10,138 | 1.00000000 |
GGB %>%
tabyl(age_blv, gender) %>%
adorn_totals(where = c("col", "row"))%>%
flextable() %>%
add_header_row(values = c("Leeftijdsklasse", "Aantal steekproefpersonen"),
colwidths = c(1, 5)) %>%
theme_vanilla() %>%
align(align = 'center', part = 'all')
Leeftijdsklasse | Aantal steekproefpersonen | ||||
|---|---|---|---|---|---|
age_blv | Man | Vrouw | Niet-binaire geslachtsidentiteit | Ik antwoord liever niet | Total |
17-30 | 248 | 506 | 8 | 12 | 774 |
31-45 | 1,095 | 1,733 | 12 | 23 | 2,863 |
46-60 | 1,358 | 1,884 | 8 | 20 | 3,270 |
61+ | 1,774 | 1,438 | 1 | 18 | 3,231 |
Total | 4,475 | 5,561 | 29 | 73 | 10,138 |
tibble(
Statistic = c("Mean", "Median", "Min", "Max"),
Cohabitants_Adults = c(
round(mean(GGB$cohabitants_adults, na.rm = TRUE), 2),
round(median(GGB$cohabitants_adults, na.rm = TRUE), 2),
round(min(GGB$cohabitants_adults, na.rm = TRUE), 2),
round(max(GGB$cohabitants_adults, na.rm = TRUE), 0)),
Cohabitants_Children = c(
round(mean(GGB$cohabitants_children, na.rm = TRUE), 2),
round(median(GGB$cohabitants_children, na.rm = TRUE), 2),
round(min(GGB$cohabitants_children, na.rm = TRUE), 2),
round(max(GGB$cohabitants_children, na.rm = TRUE), 0)),
`P-value (t-test)` = c(format(t.test(GGB$cohabitants_adults, GGB$cohabitants_children)$p.value, digits = 3), rep("", 3)) # Replicate p-value for each row
) %>%
flextable() %>%
theme_vanilla() %>%
align(align = 'center', part = 'all')
Statistic | Cohabitants_Adults | Cohabitants_Children | P-value (t-test) |
|---|---|---|---|
Mean | 2.08 | 0.54 | 0 |
Median | 2.00 | 0.00 | |
Min | 0.00 | 0.00 | |
Max | 250.00 | 10.00 |
# Create a box plot for comparison -> doesn't work well
boxplot(GGB$cohabitants_adults, GGB$cohabitants_children,
names = c("cohabitants_adults", "cohabitants_children"),
main = "Boxplot Comparison of cohabitants_adults and cohabitants_children",
ylab = "Values",
col = c("#FCB8B6", "#F72C26"),
outline = FALSE)
GGB %>%
tabyl(nationality) %>%
adorn_totals() %>%
flextable() %>%
theme_vanilla() %>%
autofit()
nationality | n | percent |
|---|---|---|
Belgian National | 9,764 | 0.96310909 |
Not Belgian National | 374 | 0.03689091 |
Total | 10,138 | 1.00000000 |
GGB %>%
tabyl(education) %>%
adorn_totals() %>%
flextable() %>%
theme_vanilla() %>%
autofit()
education | n | percent | valid_percent |
|---|---|---|---|
Geen | 18 | 0.001775498 | 0.001794080 |
Lager onderwijs | 54 | 0.005326494 | 0.005382239 |
Lager secundair onderwijs (getuigschrift 4de jaar) | 283 | 0.027914776 | 0.028206917 |
Hoger secundair onderwijs | 1,862 | 0.183665417 | 0.185587561 |
Niet-universitair hoger onderwijs / bachelor | 3,608 | 0.355888735 | 0.359613276 |
Universitair onderwijs / master / doctoraat | 4,208 | 0.415072006 | 0.419415927 |
Ik antwoord liever niet / Geen antwoord | 0 | 0.000000000 | 0.000000000 |
105 | 0.010357072 | ||
Total | 10,138 | 1.000000000 | 1.000000000 |
GGB %>%
tabyl(employment) %>%
adorn_totals() %>%
flextable() %>%
theme_vanilla() %>%
autofit()
employment | n | percent |
|---|---|---|
Anders | 389 | 0.03837049 |
Ik ben arbeidsongeschikt (ziekte, invaliditeit) | 536 | 0.05287039 |
Ik ben gepensioneerd (ook vervroegd pensioen of brugpensioen) | 2,307 | 0.22755968 |
Ik ben huisvrouw of huisman | 126 | 0.01242849 |
Ik ben scholier of student | 203 | 0.02002367 |
Ik ben werkzoekend | 127 | 0.01252713 |
Ik werk als zelfstandige, zelfstandig helper of als ondernemer | 872 | 0.08601302 |
Ik werk deeltijds in loondienst | 1,422 | 0.14026435 |
Ik werk voltijds in loondienst | 4,156 | 0.40994279 |
Total | 10,138 | 1.00000000 |
GGB %>%
tabyl(income) %>%
adorn_totals() %>%
flextable() %>%
theme_vanilla() %>%
autofit()
income | n | percent |
|---|---|---|
0 – 1.000 € per maand | 32 | 0.003156441 |
1.001 – 2.000 € per maand | 783 | 0.077234168 |
2.001 – 3.000 € per maand | 1,610 | 0.158808443 |
3.001 – 4.000 € per maand | 1,674 | 0.165121326 |
4.001 – 5.000 € per maand | 1,867 | 0.184158611 |
5.001 – 6.000 € per maand | 1,309 | 0.129118169 |
meer dan 6.000 € per maand | 1,045 | 0.103077530 |
Ik antwoord liever niet | 1,616 | 0.159400276 |
Unknown | 202 | 0.019925035 |
Total | 10,138 | 1.000000000 |
GGB %>%
tabyl(dwelling_type) %>%
adorn_totals() %>%
flextable() %>%
theme_vanilla() %>%
autofit()
dwelling_type | n | percent |
|---|---|---|
Andere | 225 | 0.02219373 |
Appartement/studio | 2,132 | 0.21029789 |
Halfopen bebouwing | 1,869 | 0.18435589 |
Open bebouwing | 2,654 | 0.26178733 |
Rijwoning met tuin | 2,862 | 0.28230420 |
Rijwoning zonder tuin | 396 | 0.03906096 |
Total | 10,138 | 1.00000000 |
GGB %>%
mutate(actiecomite_geluid = case_when(
actiecomite_geluid == 29 ~ "Yes",
actiecomite_geluid == 30 ~ "No"
)) %>%
tabyl(actiecomite_geluid) %>%
adorn_totals() %>%
flextable() %>%
theme_vanilla() %>%
autofit()
actiecomite_geluid | n | percent |
|---|---|---|
No | 9,856 | 0.97218386 |
Yes | 282 | 0.02781614 |
Total | 10,138 | 1.00000000 |
GGB %>%
tabyl(quality_life) %>%
adorn_totals() %>%
flextable() %>%
theme_vanilla() %>%
autofit()
quality_life | n | percent |
|---|---|---|
Helemaal niet tevreden | 477 | 0.04705070 |
Niet tevreden | 1,647 | 0.16245808 |
Noch tevreden, noch ontevreden | 2,024 | 0.19964490 |
Tevreden | 4,993 | 0.49250345 |
Zeer tevreden | 997 | 0.09834287 |
Total | 10,138 | 1.00000000 |
library(tidyverse)
# Create new data set with noise_annoyance and other variable, then reshape data
data3 <- GGB %>%
select(noise_annoyance,
employment,
dwelling_type,
gender,
age_blv,
income_binary) %>%
gather('var', 'value', -noise_annoyance) %>%
mutate(noise_annoyance = factor(noise_annoyance, levels = 1:5,
labels = c('Helemaal niet gehinderd', 'Niet gehinderd',
'Tamelijk gehinderd', 'Ernstig gehinderd','Extreem gehinderd')))
## Warning: attributes are not identical across measure variables; they will be
## dropped
# vertical data
data_vertical <- data3 %>% filter(var %in% c("gender", "age_blv", "income_binary"))
# Vertical bar plot
ggplot(data_vertical, aes(x = value, fill = noise_annoyance)) +
geom_bar(position = 'fill') +
facet_wrap(~ var, scales = 'free_x') +
scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A")) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank()
) +
labs(
x = "Value",
y = "Proportion",
title = "Noise Annoyance - Gender, Age, Income"
)
# Employment bar plot
data_employment <- data3 %>% filter(var=="employment")
ggplot(data_employment, aes(x = value, fill = noise_annoyance)) +
geom_bar(position = 'fill') +
facet_wrap(~ var, scales = 'free_y') +
coord_flip() +
scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A")) +
theme_minimal() +
theme(
axis.text.y = element_text(size = 9),
legend.title = element_blank(),
legend.position = "bottom"
) +
labs(
x = "Value",
y = "Proportion",
title = "Noise Annoyance by Employment"
)+
guides(fill=guide_legend(nrow=3))
# Dwelling Type bar plot
data_dwelling <- data3 %>% filter(var=="dwelling_type")
ggplot(data_dwelling, aes(x = value, fill = noise_annoyance)) +
geom_bar(position = 'fill') +
facet_wrap(~ var, scales = 'free_y') +
coord_flip() +
scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A")) +
theme_minimal() +
theme(
axis.text.y = element_text(size = 9),
legend.title = element_blank()
) +
labs(
x = "Value",
y = "Proportion",
title = "Noise Annoyance by Dwelling Type"
)
GGB %>%
select(noise_annoyance_cat,
age_blv,income, nationality, dwelling_type, employment) %>%
tbl_summary(by = noise_annoyance_cat) %>%
add_p()
## The following errors were returned during `add_p()`:
## âś– For variable `dwelling_type` (`noise_annoyance_cat`) and "estimate",
## "p.value", "conf.low", and "conf.high" statistics: FEXACT error 5. The hash
## table key cannot be computed because the largest key is larger than the
## largest representable int. The algorithm cannot proceed. Reduce the
## workspace, consider using 'simulate.p.value=TRUE' or another algorithm.
## âś– For variable `employment` (`noise_annoyance_cat`) and "estimate", "p.value",
## "conf.low", and "conf.high" statistics: FEXACT error 5. The hash table key
## cannot be computed because the largest key is larger than the largest
## representable int. The algorithm cannot proceed. Reduce the workspace,
## consider using 'simulate.p.value=TRUE' or another algorithm.
## âś– For variable `income` (`noise_annoyance_cat`) and "estimate", "p.value",
## "conf.low", and "conf.high" statistics: FEXACT error 5. The hash table key
## cannot be computed because the largest key is larger than the largest
## representable int. The algorithm cannot proceed. Reduce the workspace,
## consider using 'simulate.p.value=TRUE' or another algorithm.
| Characteristic | Ernstig gehinderd N = 2,8101 |
Extreem gehinderd N = 8161 |
Helemaal niet gehinderd N = 2191 |
Niet gehinderd N = 1,7201 |
Tamelijk gehinderd N = 4,5731 |
p-value2 |
|---|---|---|---|---|---|---|
| age_blv | <0.001 | |||||
| Â Â Â Â 17-30 | 122 (4.3%) | 44 (5.4%) | 22 (10%) | 216 (13%) | 370 (8.1%) | |
| Â Â Â Â 31-45 | 764 (27%) | 234 (29%) | 43 (20%) | 481 (28%) | 1,341 (29%) | |
| Â Â Â Â 46-60 | 991 (35%) | 316 (39%) | 58 (26%) | 488 (28%) | 1,417 (31%) | |
| Â Â Â Â 61+ | 933 (33%) | 222 (27%) | 96 (44%) | 535 (31%) | 1,445 (32%) | |
| income | ||||||
|     0 – 1.000 € per maand | 9 (0.3%) | 2 (0.2%) | 1 (0.5%) | 9 (0.5%) | 11 (0.2%) | |
|     1.001 – 2.000 € per maand | 235 (8.4%) | 110 (13%) | 17 (7.8%) | 99 (5.8%) | 322 (7.0%) | |
|     2.001 – 3.000 € per maand | 505 (18%) | 149 (18%) | 38 (17%) | 212 (12%) | 706 (15%) | |
|     3.001 – 4.000 € per maand | 486 (17%) | 120 (15%) | 32 (15%) | 255 (15%) | 781 (17%) | |
|     4.001 – 5.000 € per maand | 478 (17%) | 105 (13%) | 34 (16%) | 377 (22%) | 873 (19%) | |
|     5.001 – 6.000 € per maand | 311 (11%) | 94 (12%) | 26 (12%) | 250 (15%) | 628 (14%) | |
|     meer dan 6.000 € per maand | 242 (8.6%) | 61 (7.5%) | 33 (15%) | 218 (13%) | 491 (11%) | |
| Â Â Â Â Ik antwoord liever niet | 478 (17%) | 161 (20%) | 36 (16%) | 276 (16%) | 665 (15%) | |
| Â Â Â Â Unknown | 66 (2.3%) | 14 (1.7%) | 2 (0.9%) | 24 (1.4%) | 96 (2.1%) | |
| nationality | 0.4 | |||||
| Â Â Â Â Belgian National | 2,691 (96%) | 785 (96%) | 213 (97%) | 1,659 (96%) | 4,416 (97%) | |
| Â Â Â Â Not Belgian National | 119 (4.2%) | 31 (3.8%) | 6 (2.7%) | 61 (3.5%) | 157 (3.4%) | |
| dwelling_type | ||||||
| Â Â Â Â Andere | 65 (2.3%) | 28 (3.4%) | 4 (1.8%) | 42 (2.4%) | 86 (1.9%) | |
| Â Â Â Â Appartement/studio | 685 (24%) | 220 (27%) | 26 (12%) | 257 (15%) | 944 (21%) | |
| Â Â Â Â Halfopen bebouwing | 476 (17%) | 137 (17%) | 35 (16%) | 340 (20%) | 881 (19%) | |
| Â Â Â Â Open bebouwing | 641 (23%) | 199 (24%) | 123 (56%) | 573 (33%) | 1,118 (24%) | |
| Â Â Â Â Rijwoning met tuin | 835 (30%) | 192 (24%) | 29 (13%) | 453 (26%) | 1,353 (30%) | |
| Â Â Â Â Rijwoning zonder tuin | 108 (3.8%) | 40 (4.9%) | 2 (0.9%) | 55 (3.2%) | 191 (4.2%) | |
| employment | ||||||
| Â Â Â Â Anders | 118 (4.2%) | 45 (5.5%) | 7 (3.2%) | 47 (2.7%) | 172 (3.8%) | |
| Â Â Â Â Ik ben arbeidsongeschikt (ziekte, invaliditeit) | 181 (6.4%) | 97 (12%) | 7 (3.2%) | 65 (3.8%) | 186 (4.1%) | |
| Â Â Â Â Ik ben gepensioneerd (ook vervroegd pensioen of brugpensioen) | 644 (23%) | 150 (18%) | 70 (32%) | 394 (23%) | 1,049 (23%) | |
| Â Â Â Â Ik ben huisvrouw of huisman | 45 (1.6%) | 11 (1.3%) | 1 (0.5%) | 16 (0.9%) | 53 (1.2%) | |
| Â Â Â Â Ik ben scholier of student | 30 (1.1%) | 12 (1.5%) | 7 (3.2%) | 63 (3.7%) | 91 (2.0%) | |
| Â Â Â Â Ik ben werkzoekend | 41 (1.5%) | 13 (1.6%) | 2 (0.9%) | 19 (1.1%) | 52 (1.1%) | |
| Â Â Â Â Ik werk als zelfstandige, zelfstandig helper of als ondernemer | 246 (8.8%) | 86 (11%) | 23 (11%) | 120 (7.0%) | 397 (8.7%) | |
| Â Â Â Â Ik werk deeltijds in loondienst | 405 (14%) | 102 (13%) | 27 (12%) | 250 (15%) | 638 (14%) | |
| Â Â Â Â Ik werk voltijds in loondienst | 1,100 (39%) | 300 (37%) | 75 (34%) | 746 (43%) | 1,935 (42%) | |
| 1 n (%) | ||||||
| 2 Pearson’s Chi-squared test | ||||||
GGB %>%
tabyl(q2_7_5) %>%
adorn_totals() %>%
flextable() %>%
theme_vanilla() %>%
autofit()
q2_7_5 | n | percent | valid_percent |
|---|---|---|---|
1 | 2,630 | 0.25942 | 1 |
7,508 | 0.74058 | ||
Total | 10,138 | 1.00000 | 1 |
nature <- c("nature_birds", "nature_poultry", "nature_crickets", "nature_frogs", "nature_trees", "nature_water", "nature_animals", "nature_other", "nature_none")
# Recode 0 to "No" and 1 to "Yes" for nature variables
GGB[nature] <- lapply(GGB[nature], function(x) {
factor(case_when(
x == 1 ~ "Yes",
x == 0 ~ "No",
TRUE ~ "Unknown"
), levels = c("Unknown", "No", "Yes"))
})
# Prepare data for plotting
nature_data <- GGB[nature] %>%
mutate_all(as.factor) %>%
pivot_longer(cols = everything(), names_to = "Variable", values_to = "Response") %>%
group_by(Variable, Response) %>%
dplyr::summarise(n = n(), .groups = "drop") %>%
group_by(Variable) %>%
mutate(percent = n / sum(n) * 100)
# Plot 100% stacked bar chart
ggplot(nature_data, aes(x = Variable, y = percent, fill = Response)) +
geom_bar(stat = "identity", width = 0.7) +
geom_text(aes(label = ifelse(Response == "Yes", paste0(round(percent, 1), "%"), "")),
position = position_stack(vjust = 1),
color = "white", size = 3.5, fontface = "bold") +
scale_fill_manual(values = c("Unknow" = "#FDE0DF", "No" = "#F67D79", "Yes" = "#B21F1A")) +
labs(title = "Bar Chart of Nature Variables",
x = NULL, y = "Percentage", fill = "Response") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
GGB %>%
tabyl(bewustzijn_eigen_geluid_cat) %>%
adorn_totals() %>%
flextable() %>%
theme_vanilla() %>%
autofit()
bewustzijn_eigen_geluid_cat | n | percent |
|---|---|---|
Always | 3,648 | 0.35983429 |
Often | 4,494 | 0.44328270 |
Sometimes | 1,378 | 0.13592425 |
Rarely | 468 | 0.04616295 |
Never | 150 | 0.01479582 |
Total | 10,138 | 1.00000000 |
GGB %>%
tabyl(floor_bedroom) %>%
adorn_totals() %>%
flextable() %>%
theme_vanilla() %>%
autofit()
floor_bedroom | n | percent |
|---|---|---|
Ondergronds | 11 | 0.001085027 |
Gelijkvloers | 1,128 | 0.111264549 |
1ste verdieping | 6,168 | 0.608404024 |
2de verdieping | 1,702 | 0.167883212 |
3de verdieping | 453 | 0.044683370 |
4de verdieping | 187 | 0.018445453 |
5de verdieping | 103 | 0.010159795 |
6de verdieping | 49 | 0.004833300 |
7de verdieping | 41 | 0.004044190 |
8de verdieping | 0 | 0.000000000 |
Hoger dan 8ste verdieping | 75 | 0.007397909 |
Unknown | 221 | 0.021799171 |
Total | 10,138 | 1.000000000 |
GGB %>%
tabyl(window_street) %>%
adorn_totals() %>%
flextable() %>%
theme_vanilla() %>%
autofit()
window_street | n | percent |
|---|---|---|
Yes | 4,623 | 0.456007102 |
No | 5,430 | 0.535608601 |
Unknown | 85 | 0.008384297 |
Total | 10,138 | 1.000000000 |
GGB %>%
tabyl(window_night_num) %>%
adorn_totals() %>%
flextable() %>%
theme_vanilla() %>%
autofit()
window_night_num | n | percent |
|---|---|---|
Always closed | 3,122 | 0.3079503 |
Mostly closed | 3,444 | 0.3397120 |
Mostly open | 2,433 | 0.2399882 |
Always open | 1,139 | 0.1123496 |
Total | 10,138 | 1.0000000 |
GGB %>%
tabyl(q8_7) %>%
adorn_totals() %>%
setNames(c("Noise influence on closing windows at night", "n", "percent")) %>%
flextable() %>%
theme_vanilla() %>%
autofit()
Noise influence on closing windows at night | n | percent |
|---|---|---|
Yes, this is the main reason | 5,477 | 0.5402446 |
Yes, but due to other reasons | 1,980 | 0.1953048 |
No | 2,681 | 0.2644506 |
Total | 10,138 | 1.0000000 |
hist(GGB$total_hyperacusis,
col = "#F67D79",
main = "Distribution of Hyperacusis",
xlab = "Total of Hyperacusis")
hist(GGB$wns_sum_score,
col = "#F67D79",
main = "Distribution of WNS",
xlab = "Total score of WNS")
table(is.na(GGB$sleep_duration_day))
##
## FALSE
## 10138
# Reshape the data to long format
sleep_data <- GGB %>%
select(sleep_duration_day, sleep_duration_night) %>%
pivot_longer(cols = everything(), names_to = "Period", values_to = "Duration") %>%
group_by(Period, Duration) %>%
dplyr::summarise(n = n(), .groups = "drop") %>%
group_by(Period) %>%
mutate(percent = n / sum(n) * 100)
# Plot
ggplot(sleep_data, aes(x = Period, y = percent, fill = Duration)) +
geom_bar(stat = "identity", width = 0.7) +
geom_text(aes(label = ifelse(percent > 5, paste0(round(percent, 1), "%"), "")),
position = position_stack(vjust = 0.5),
size = 3, color = "white") +
coord_flip() +
scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#ffa9a9", "#F67D79", "#F72C26", "#B21F1A","#7E2811","#780E00" )) +
labs(title = "Distribution of Sleep Duration (Day vs. Night)",
x = NULL, y = "Percentage", fill = "Sleep Duration") +
theme_minimal(base_size = 13)
# Define all possible 8 categories
categories <- sort(unique(na.omit(c(GGB$sleep_duration_day, GGB$sleep_duration_night))))
# Calculate frequencies for each variable
day_tab <- GGB %>%
tabyl(sleep_duration_day) %>%
adorn_totals("row") %>%
dplyr::rename(category = sleep_duration_day,
n_day = n,
percent_day = percent)
night_tab <- GGB %>%
tabyl(sleep_duration_night) %>%
adorn_totals("row") %>%
dplyr::rename(category = sleep_duration_night,
n_night = n, percent_night = percent)
# Merge into one table
full_join(day_tab, night_tab, by = "category") %>%
filter(category != "Total") %>% # remove total if not needed
mutate(
percent_day = round(percent_day * 100, 2),
percent_night = round(percent_night * 100, 2)) %>%
flextable() %>%
theme_vanilla() %>%
autofit()
category | n_day | percent_day | n_night | percent_night |
|---|---|---|---|---|
0-2 uur | 2,283 | 22.52 | 10 | 0.10 |
10-12 uur | 5 | 0.05 | 55 | 0.54 |
2-4 uur | 88 | 0.87 | 81 | 0.80 |
4-6 uur | 56 | 0.55 | 1,412 | 13.93 |
6-8 uur | 211 | 2.08 | 6,820 | 67.27 |
8-10 uur | 65 | 0.64 | 1,744 | 17.20 |
>12 uur | 1 | 0.01 | 6 | 0.06 |
Niet | 7,429 | 73.28 | 10 | 0.10 |
table(GGB$fall_asleep)
##
## >120 min 0-15 min 16-30 min 31-45 min 46-60 min 61-120 min
## 76 4177 3445 1429 669 342
ggplot(GGB, aes(x = "Fall asleep", fill = fall_asleep)) +
geom_bar(position = "fill", width = 0.5) +
geom_text(stat = "count", aes(label = paste0(round((..count..) / sum(..count..) * 100, 1), "%")),
position = position_fill(vjust = 0.5), color = "white", size = 4) +
scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#ffa9a9", "#F67D79", "#F72C26", "#B21F1A","#7E2811","#780E00" )) +
labs(x = NULL, y = "Percentage", fill = "Time to fall asleep",
title = "Time Taken to Fall Asleep") +
theme_minimal(base_size = 14)
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
sleep_disturb <- c("sleep_disturb_road", "sleep_disturb_train", "sleep_disturb_tram", "sleep_disturb_air", "sleep_disturb_neighbor")
# Recode
GGB[sleep_disturb] <- lapply(GGB[sleep_disturb], function(x) {
factor(case_when(
x == 1 ~ "Nooit",
x == 2 ~ "Minder dan jaarlijks ",
x == 3 ~ "Een of meerdere keren per jaar, maar niet maandelijks ",
x == 4 ~ "Een of meerdere keren per maand, maar niet wekelijks ",
x == 5 ~ "Een of meerdere keren per week, maar niet dagelijks ",
x == 6 ~ "Elke nacht",
TRUE ~ "Unknown"
), levels = c("Unknown", "Nooit", "Minder dan jaarlijks ", "Een of meerdere keren per week, maar niet dagelijks ","Een of meerdere keren per maand, maar niet wekelijks ", "Een of meerdere keren per jaar, maar niet maandelijks ", "Elke nacht"))
})
# Prepare data for plotting
sleep_disturb_data <- GGB[sleep_disturb] %>%
mutate_all(as.factor) %>%
pivot_longer(cols = everything(), names_to = "Variable", values_to = "Response") %>%
group_by(Variable, Response) %>%
dplyr::summarise(n = n(), .groups = "drop") %>%
group_by(Variable) %>%
mutate(percent = n / sum(n) * 100)
# Plot 100% stacked bar chart
ggplot(sleep_disturb_data, aes(x = Variable, y = percent, fill = Response)) +
geom_bar(stat = "identity", width = 0.7) +
geom_text(aes(label = ifelse(Response == "Yes", paste0(round(percent, 1), "%"), "")),
position = position_stack(vjust = 1),
color = "white", size = 3.5, fontface = "bold") +
scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A","#7E2811","#780E00" )) +
labs(title = "Bar Chart of Nature Variables",
x = NULL, y = "Percentage", fill = "Response") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot(GGB, aes(x = gender, y = total_ess, fill = gender)) +
geom_violin(trim = FALSE, alpha = 0.4) + # Violin plot
geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) + # Boxplot inside violin
coord_flip()+
stat_compare_means(method = "anova", label = "p.format",
label.x = 4.45,
label.y = 5.5) + # Add test result
scale_fill_manual(values = c("#FDE0DF", "#F67D79", "#F72C26", "#780E00"))+
labs(
title = "Distribution of Total ESS by Gender",
x = "Gender",
y = "Total ESS Score"
) +
theme_minimal() +
theme(legend.position = "none")
Based on the one-way ANOVA (p = 0.99), there is no evidence of a statistically significant difference in total ESS scores across gender group
ggplot(GGB, aes(x = age_blv, y = total_ess, fill = age_blv)) +
geom_violin(trim = FALSE, alpha = 0.4) + # Violin plot
geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) + # Boxplot inside violin
coord_flip()+
stat_compare_means(method = "anova", label = "p.format",
label.x = 4.45,
label.y = 5.5) + # Add test result
scale_fill_manual(values = c("#FDE0DF", "#F67D79", "#F72C26", "#780E00"))+
labs(
title = "Distribution of Total ESS by Age Group",
x = "Age group",
y = "Total ESS Score"
) +
theme_minimal() +
theme(legend.position = "none")
Based on the one-way ANOVA (p < 0.001), there is a statistically significant difference in total ESS scores across age group
ggplot(GGB %>% filter(!is.na(income_binary)), aes(x = income_binary, y = total_ess, fill = income_binary)) +
geom_violin(trim = FALSE, alpha = 0.4) + # Violin plot
geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) + # Boxplot inside violin
coord_flip()+
stat_compare_means(method = "wilcox.test", label = "p.format",
label.x = 2.45,
label.y = 5.5) + # Add test result
scale_fill_manual(values = c( "#F67D79", "#780E00"))+
labs(
title = "Distribution of Total ESS by Income",
x = "Income",
y = "Total ESS Score"
) +
theme_minimal() +
theme(legend.position = "none")
Based on the Wilcoxon test (p = 0.72), there is no evidence of a statistically significant difference in total ESS scores across income group
ggplot(GGB, aes(x = dwelling_type, y = total_ess, fill = dwelling_type)) +
geom_violin(trim = FALSE, alpha = 0.4) + # Violin plot
geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) + # Boxplot inside violin
coord_flip()+
stat_compare_means(method = "anova", label = "p.format",
label.x = 6.40,
label.y = 5.5) + # Add test result
scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A","#7E2811" ))+
labs(
title = "Distribution of Total ESS by Dwelling Type",
x = "Dwelling Type",
y = "Total ESS Score"
) +
theme_minimal() +
theme(legend.position = "none")
Based on the anova test (p = 0.35), there is no evidence of a statistically significant difference in total ESS scores across dwelling type.
table(GGB$employment)
##
## Anders
## 389
## Ik ben arbeidsongeschikt (ziekte, invaliditeit)
## 536
## Ik ben gepensioneerd (ook vervroegd pensioen of brugpensioen)
## 2307
## Ik ben huisvrouw of huisman
## 126
## Ik ben scholier of student
## 203
## Ik ben werkzoekend
## 127
## Ik werk als zelfstandige, zelfstandig helper of als ondernemer
## 872
## Ik werk deeltijds in loondienst
## 1422
## Ik werk voltijds in loondienst
## 4156
ggplot(GGB, aes(x = employment, y = total_ess, fill = employment)) +
geom_violin(trim = FALSE, alpha = 0.4) + # Violin plot
geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) + # Boxplot inside violin
coord_flip()+
stat_compare_means(method = "anova", label = "p.format",
label.x = 9.35,
label.y = 4.5) + # Add test result
labs(
title = "Distribution of Total ESS by Employment",
x = "Employment",
y = "Total ESS Score"
) +
theme_minimal() +
theme(legend.position = "none")
ggplot(GGB, aes(x = gender, y = total_fas, fill = gender)) +
geom_violin(trim = FALSE, alpha = 0.4) + # Violin plot
geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) + # Boxplot inside violin
coord_flip()+
stat_compare_means(method = "anova", label = "p.format",
label.x = 4.45,
label.y = 5.5) + # Add test result
scale_fill_manual(values = c("#FDE0DF", "#F67D79", "#F72C26", "#780E00"))+
labs(
title = "Distribution of Total fas by Gender",
x = "Gender",
y = "Total fas Score"
) +
theme_minimal() +
theme(legend.position = "none")
Based on the one-way ANOVA (p < 0.001), there is a statistically significant difference in total fas scores across gender group
ggplot(GGB, aes(x = age_blv, y = total_fas, fill = age_blv)) +
geom_violin(trim = FALSE, alpha = 0.4) + # Violin plot
geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) + # Boxplot inside violin
coord_flip()+
stat_compare_means(method = "anova", label = "p.format",
label.x = 4.45,
label.y = 7.5) + # Add test result
scale_fill_manual(values = c("#FDE0DF", "#F67D79", "#F72C26", "#780E00"))+
labs(
title = "Distribution of Total fas by Age Group",
x = "Age group",
y = "Total fas Score"
) +
theme_minimal() +
theme(legend.position = "none")
Based on the one-way ANOVA (p < 0.001), there is a statistically significant difference in total fas scores across age group
ggplot(GGB %>% filter(!is.na(income_binary)), aes(x = income_binary, y = total_fas, fill = income_binary)) +
geom_violin(trim = FALSE, alpha = 0.4) + # Violin plot
geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) + # Boxplot inside violin
coord_flip()+
stat_compare_means(method = "t.test", label = "p.format",
label.x = 2.45,
label.y = 7.5) + # Add test result
scale_fill_manual(values = c( "#F67D79", "#780E00"))+
labs(
title = "Distribution of Total fas by Income",
x = "Income",
y = "Total fas Score"
) +
theme_minimal() +
theme(legend.position = "none")
Based on the Wilcoxon test (p < 0.001), there is a statistically significant difference in total fas scores across income group
ggplot(GGB, aes(x = dwelling_type, y = total_fas, fill = dwelling_type)) +
geom_violin(trim = FALSE, alpha = 0.4) + # Violin plot
geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) + # Boxplot inside violin
coord_flip()+
stat_compare_means(method = "anova", label = "p.format",
label.x = 6.40,
label.y = 5.5) + # Add test result
scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A","#7E2811" ))+
labs(
title = "Distribution of Total fas by Dwelling Type",
x = "Dwelling Type",
y = "Total fas Score"
) +
theme_minimal() +
theme(legend.position = "none")
Based on the one-way ANOVA (p < 0.001), there is a statistically significant difference in total fas scores across dwelling type.
table(GGB$employment)
##
## Anders
## 389
## Ik ben arbeidsongeschikt (ziekte, invaliditeit)
## 536
## Ik ben gepensioneerd (ook vervroegd pensioen of brugpensioen)
## 2307
## Ik ben huisvrouw of huisman
## 126
## Ik ben scholier of student
## 203
## Ik ben werkzoekend
## 127
## Ik werk als zelfstandige, zelfstandig helper of als ondernemer
## 872
## Ik werk deeltijds in loondienst
## 1422
## Ik werk voltijds in loondienst
## 4156
ggplot(GGB, aes(x = employment, y = total_fas, fill = employment)) +
geom_violin(trim = FALSE, alpha = 0.4) + # Violin plot
geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) + # Boxplot inside violin
coord_flip()+
stat_compare_means(method = "anova", label = "p.format",
label.x = 9.35,
label.y = 4.5) + # Add test result
labs(
title = "Distribution of Total fas by Employment",
x = "Employment",
y = "Total fas Score"
) +
theme_minimal() +
theme(legend.position = "none")
ggplot(GGB, aes(x = gender, y = pss_total, fill = gender)) +
geom_violin(trim = FALSE, alpha = 0.4) + # Violin plot
geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) + # Boxplot inside violin
coord_flip()+
stat_compare_means(method = "anova", label = "p.format",
label.x = 4.45,
label.y = 5.5) + # Add test result
scale_fill_manual(values = c("#FDE0DF", "#F67D79", "#F72C26", "#780E00"))+
labs(
title = "Distribution of Total stress by Gender",
x = "Gender",
y = "Total stress Score"
) +
theme_minimal() +
theme(legend.position = "none")
Based on the one-way ANOVA (p < 0.001), there is a statistically significant difference in total stress scores across gender group
ggplot(GGB, aes(x = age_blv, y = pss_total, fill = age_blv)) +
geom_violin(trim = FALSE, alpha = 0.4) + # Violin plot
geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) + # Boxplot inside violin
coord_flip()+
stat_compare_means(method = "anova", label = "p.format",
label.x = 4.45,
label.y = 5.5) + # Add test result
scale_fill_manual(values = c("#FDE0DF", "#F67D79", "#F72C26", "#780E00"))+
labs(
title = "Distribution of Total stress by Age Group",
x = "Age group",
y = "Total stress Score"
) +
theme_minimal() +
theme(legend.position = "none")
Based on the one-way ANOVA (p < 0.001), there is a statistically significant difference in total stress scores across age group
ggplot(GGB %>% filter(!is.na(income_binary)), aes(x = income_binary, y = pss_total, fill = income_binary)) +
geom_violin(trim = FALSE, alpha = 0.4) + # Violin plot
geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) + # Boxplot inside violin
coord_flip()+
stat_compare_means(method = "wilcox.test", label = "p.format",
label.x = 2.45,
label.y = 5.5) + # Add test result
scale_fill_manual(values = c( "#F67D79", "#780E00"))+
labs(
title = "Distribution of Total stress by Income",
x = "Income",
y = "Total stress Score"
) +
theme_minimal() +
theme(legend.position = "none")
Based on the Wilcoxon (p < 0.001), there is a statistically significant difference in total stress scores across income group
ggplot(GGB, aes(x = dwelling_type, y = pss_total, fill = dwelling_type)) +
geom_violin(trim = FALSE, alpha = 0.4) + # Violin plot
geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) + # Boxplot inside violin
coord_flip()+
stat_compare_means(method = "anova", label = "p.format",
label.x = 6.40,
label.y = 5.5) + # Add test result
scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F67D79", "#F72C26", "#B21F1A","#7E2811" ))+
labs(
title = "Distribution of Total stress by Dwelling Type",
x = "Dwelling Type",
y = "Total stress Score"
) +
theme_minimal() +
theme(legend.position = "none")
Based on the anova test (p < 0.001), there is a statistically significant difference in total stress scores across dwelling type.
table(GGB$employment)
##
## Anders
## 389
## Ik ben arbeidsongeschikt (ziekte, invaliditeit)
## 536
## Ik ben gepensioneerd (ook vervroegd pensioen of brugpensioen)
## 2307
## Ik ben huisvrouw of huisman
## 126
## Ik ben scholier of student
## 203
## Ik ben werkzoekend
## 127
## Ik werk als zelfstandige, zelfstandig helper of als ondernemer
## 872
## Ik werk deeltijds in loondienst
## 1422
## Ik werk voltijds in loondienst
## 4156
ggplot(GGB, aes(x = employment, y = pss_total, fill = employment)) +
geom_violin(trim = FALSE, alpha = 0.4) + # Violin plot
geom_boxplot(width = 0.1, outlier.color = "red", outlier.size = 2) + # Boxplot inside violin
coord_flip()+
stat_compare_means(method = "anova", label = "p.format",
label.x = 9.35,
label.y = 4.5) + # Add test result
labs(
title = "Distribution of Total stress by Employment",
x = "Employment",
y = "Total stress Score"
) +
theme_minimal() +
theme(legend.position = "none")
GGB %>%
tabyl(q4_2) %>%
adorn_totals() %>%
setNames(c("General physical health status", "n", "percent")) %>%
flextable() %>%
theme_vanilla() %>%
autofit()
General physical health status | n | percent |
|---|---|---|
Heel goed | 1,471 | 0.145097652 |
Eerder goed | 4,767 | 0.470211087 |
Gemiddeld | 3,033 | 0.299171434 |
Eerder slecht | 810 | 0.079897416 |
Heel slecht | 57 | 0.005622411 |
Total | 10,138 | 1.000000000 |
physical_complaints <- c("q4_3_1", "q4_3_2", "q4_3_3", "q4_3_4", "q4_3_5", "q4_3_6", "q4_3_7", "q4_3_8")
# Recode
GGB[physical_complaints] <- lapply(GGB[physical_complaints], function(x) {
factor(case_when(
x == 1 ~ "Nooit",
x == 2 ~ "Een paar keer per jaar",
x == 3 ~ "Een paar keer per maand",
x == 4 ~ "Een paar keer per week",
x == 5 ~ "Dagelijks"
), levels = c("Nooit", "Een paar keer per jaar", "Een paar keer per maand","Een paar keer per week", "Dagelijks"))
})
# Prepare data for plotting
physical_complaints_data <- GGB[physical_complaints] %>%
mutate_all(as.factor) %>%
pivot_longer(cols = everything(), names_to = "Variable", values_to = "Response") %>%
group_by(Variable, Response) %>%
dplyr::summarise(n = n(), .groups = "drop") %>%
group_by(Variable) %>%
mutate(percent = n / sum(n) * 100)
# Relabel variable names
physical_complaints_data <- physical_complaints_data %>%
mutate(Variable = revalue(Variable,
c("q4_3_1" = "Hoofdpijn",
"q4_3_2" = "Duizeligheid",
"q4_3_3" = "Hartkloppingen",
"q4_3_4" = "Maag- en darmklachten",
"q4_3_5" = "Slaperigheid",
"q4_3_6" = "Vermoeidheid",
"q4_3_7" = "Slapeloosheid",
"q4_3_8" = "Psychische stoornissen"
)))
## The following `from` values were not present in `x`: q4_3_2, q4_3_3, q4_3_4, q4_3_5, q4_3_6, q4_3_7, q4_3_8
## The following `from` values were not present in `x`: q4_3_1, q4_3_3, q4_3_4, q4_3_5, q4_3_6, q4_3_7, q4_3_8
## The following `from` values were not present in `x`: q4_3_1, q4_3_2, q4_3_4, q4_3_5, q4_3_6, q4_3_7, q4_3_8
## The following `from` values were not present in `x`: q4_3_1, q4_3_2, q4_3_3, q4_3_5, q4_3_6, q4_3_7, q4_3_8
## The following `from` values were not present in `x`: q4_3_1, q4_3_2, q4_3_3, q4_3_4, q4_3_6, q4_3_7, q4_3_8
## The following `from` values were not present in `x`: q4_3_1, q4_3_2, q4_3_3, q4_3_4, q4_3_5, q4_3_7, q4_3_8
## The following `from` values were not present in `x`: q4_3_1, q4_3_2, q4_3_3, q4_3_4, q4_3_5, q4_3_6, q4_3_8
## The following `from` values were not present in `x`: q4_3_1, q4_3_2, q4_3_3, q4_3_4, q4_3_5, q4_3_6, q4_3_7
# Plot 100% stacked bar chart
ggplot(physical_complaints_data, aes(x = Variable, y = percent, fill = Response)) +
geom_bar(stat = "identity", width = 0.7) +
geom_text(aes(label = ifelse(Response == "Yes", paste0(round(percent, 1), "%"), "")),
position = position_stack(vjust = 1),
color = "white", size = 3.5, fontface = "bold") +
scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F72C26", "#B21F1A","#7E2811" )) +
labs(title = "Bar Chart of Physical complaints",
x = NULL, y = "Percentage", fill = "Response") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
freq <- table(GGB$q4_4)
pct <- round(100 * freq / sum(freq), 1)
labels <- paste0(names(freq), ": ", freq, " (", pct, "%)")
pie(freq, labels = labels, main = "Pie chart of chronic condition", col= c("#FDE0DF", "#F72C26"))
chronic_disorders <- c("q4_5_1", "q4_5_2", "q4_5_3", "q4_5_4", "q4_5_5", "q4_5_6", "q4_5_7")
# Prepare data for plotting
chronic_disorders_data <- GGB %>%
select(all_of(chronic_disorders)) %>%
pivot_longer(cols = everything(), names_to = "variable", values_to = "value") %>%
filter(value == 1)
summary_chronic_disorders <- aggregate(value == 1 ~ variable, data = chronic_disorders_data , sum)
# Relabel variable names
names(summary_chronic_disorders)[2] <- "frequency"
summary_chronic_disorders <- summary_chronic_disorders %>%
mutate(variable = revalue(variable,
c("q4_5_1" = "Aandoeningen van het ademhalingsstelsel
(bijvoorbeeld astma, COPD)",
"q4_5_2" = "Aandoeningen van het spijsverteringsstelsel
(bijvoorbeeld ziekte van Crohn, levercirrose)",
"q4_5_3" = "Hart- en vaataandoeningen
(bijvoorbeeld hoge bloeddruk, hartritmestoornissen)",
"q4_5_4" = "Neurologische en psychische aandoeningen
(bijvoorbeeld epilepsie, multiple sclerose)",
"q4_5_5" = "Metabole aandoeningen
(bijvoorbeeld diabetes, obesitas)",
"q4_5_6" = "Andere",
"q4_5_7" = "Slaapapneu"
))) %>%
mutate(variable = reorder(variable, frequency))
# Plot bar chart
ggplot(summary_chronic_disorders, aes(x = variable, y = frequency)) +
geom_bar(stat = "identity", width = 0.7, fill = "red") +
geom_text(aes(label = frequency), hjust = -0.1, size = 4) +
coord_flip()+
labs(title = "Bar Chart of Chronic disorders",
x = NULL, y = "Frequency") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 1))+
expand_limits(y = max(summary_chronic_disorders$frequency) * 1.1)
freq_4_6 <- table(GGB$q4_6)
pct_4_6 <- round(100 * freq_4_6 / sum(freq_4_6), 1)
labels_4_6 <- paste0(names(freq_4_6), ": ", freq_4_6, " (", pct_4_6, "%)")
pie(freq_4_6, labels = labels_4_6, main = "Pie chart of allergic condition", col= c("#FDE0DF", "#F72C26"))
ggplot(GGB %>% filter(!is.na(q4_7)), aes(x=q4_7)) +
geom_bar(width = 0.7, fill = "red")+
coord_flip()+
labs(title = "Bar graph of allergic disorders",
x = "Allergic disorders")+
theme(axis.text.x = element_text(hjust = 1),
plot.title = element_text(hjust = 1))
ggplot(data = (GGB %>%
filter(!is.na(q4_8)) %>%
filter(q4_8 >= quantile(q4_8, 0.25) - 1.5 * IQR(q4_8),
q4_8 <= quantile(q4_8, 0.75) + 1.5 * IQR(q4_8))),
aes(x = q4_8)) +
geom_histogram(binwidth = 1, fill = "red", color = "black") +
labs(title = "Histogram of Respiratory infections",
x = "Respiratory infections (times)", y = "Count") +
theme_minimal()
GGB %>%
mutate(q4_9 = case_when(
q4_9 == 1 ~ "Yes",
q4_9 == 2 ~ "No"
)) %>%
tabyl(q4_9) %>%
adorn_totals() %>%
flextable() %>%
theme_vanilla() %>%
autofit()
q4_9 | n | percent |
|---|---|---|
No | 9,287 | 0.91605839 |
Yes | 851 | 0.08394161 |
Total | 10,138 | 1.00000000 |
ggplot(data = (GGB %>%
filter(!is.na(q4_13)) %>%
filter(q4_13 >= quantile(q4_13, 0.25) - 1.5 * IQR(q4_13),
q4_13 <= quantile(q4_13, 0.75) + 1.5 * IQR(q4_13))),
aes(x = q4_13)) +
geom_histogram(binwidth = 1, fill = "red", color = "black") +
labs(title = "Histogram of Moderate to intense exercise",
x = "Moderate to intense exercise (hours)", y = "Count") +
theme_minimal()
ggplot(data = (GGB %>%
filter(!is.na(q4_14)) %>%
filter(q4_14 >= quantile(q4_14, 0.25) - 1.5 * IQR(q4_14),
q4_14 <= quantile(q4_14, 0.75) + 1.5 * IQR(q4_14))),
aes(x = q4_14)) +
geom_histogram(binwidth = 1, fill = "red", color = "black") +
labs(title = "Histogram of Time in outdoor environment ",
x = "Time in outdoor environment (hours)", y = "Count") +
theme_minimal()
ggplot(data = (GGB %>%
filter(!is.na(smartphone_use)) %>%
filter(smartphone_use >= quantile(smartphone_use, 0.25) - 1.5 * IQR(smartphone_use),
smartphone_use <= quantile(smartphone_use, 0.75) + 1.5 * IQR(smartphone_use))),
aes(x = smartphone_use)) +
geom_histogram(binwidth = 1, fill = "red", color = "black") +
labs(title = "Histogram of Time in using smartphone",
x = "Time in using smartphone (hours)", y = "Count") +
theme_minimal()
GGB %>%
tabyl(q4_16) %>%
adorn_totals() %>%
flextable() %>%
theme_vanilla() %>%
autofit()
q4_16 | n | percent |
|---|---|---|
Nooit (al dan niet in het bezit van een rijbewijs) | 1,016 | 0.10021701 |
Minder dan jaarlijks | 122 | 0.01203393 |
Een of meerdere keren per jaar, maar niet maandelijks | 490 | 0.04833300 |
Een of meerdere keren per maand, maar niet wekelijks | 1,549 | 0.15279148 |
Een of meerdere keren per week, maar niet dagelijks | 4,421 | 0.43608207 |
Dagelijks | 2,540 | 0.25054251 |
Total | 10,138 | 1.00000000 |
# Prepare data for plotting
hearing_status_data <- GGB %>%
select(c("q7_2_1", "q7_2_2")) %>%
mutate_all(as.factor) %>%
pivot_longer(cols = everything(), names_to = "Variable", values_to = "Response") %>%
group_by(Variable, Response) %>%
dplyr::summarise(n = n(), .groups = "drop") %>%
group_by(Variable) %>%
mutate(percent = n / sum(n) * 100) %>%
mutate(Variable = case_when(
Variable == "q7_2_1" ~ "Right ear",
Variable == "q7_2_2" ~ "Left ear"
))
# Plot 100% stacked bar chart
ggplot(hearing_status_data, aes(x = Variable, y = percent, fill = Response)) +
geom_bar(stat = "identity", width = 0.7) +
geom_text(aes(label = ifelse(Response == "Yes", paste0(round(percent, 1), "%"), "")),
position = position_stack(vjust = 1),
color = "white", size = 3.5, fontface = "bold") +
coord_flip()+
scale_fill_manual(values = c("#FDE0DF", "#FCB8B6", "#F72C26", "#7E2811" )) +
labs(title = "Bar Chart of Hearing Status",
x = NULL, y = "Percentage", fill = "Response") +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(hjust = 1),
legend.position = "bottom")+
guides(fill = guide_legend(nrow = 4))
GGB %>%
tabyl(q7_3) %>%
adorn_totals() %>%
flextable() %>%
theme_vanilla() %>%
autofit()
q7_3 | n | percent |
|---|---|---|
Ik heb nooit gehoorproblemen | 6,520 | 0.6431249 |
Ik heb een (licht) gehoorprobleem of twijfel of ik er een heb | 3,618 | 0.3568751 |
Total | 10,138 | 1.0000000 |
GGB %>%
tabyl(q7_5) %>%
adorn_totals() %>%
flextable() %>%
theme_vanilla() %>%
autofit()
q7_5 | n | percent |
|---|---|---|
Ja | 2,660 | 0.26237917 |
Ik weet het niet | 640 | 0.06312882 |
Nee | 6,838 | 0.67449201 |
Total | 10,138 | 1.00000000 |