Packages

pacman::p_load(janitor, 
               tidyverse, 
               gtsummary, 
               patchwork, 
               reporter, 
               sjPlot, 
               scales, 
               likert)

theme_set(theme_minimal())

Dataset

df <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vS8G_1WOroX3Bj1FSI-aV7qcMc3bb4osbttxy-gBc7xjzYXealbfdREtx5sppFe5qteGcSc4gN15FKj/pub?gid=14193945&single=true&output=csv")

Create a new column with the year and calculate the age

cleaning the date

df %>% 
  mutate(Year_actual = format(as.Date(Datum, format="%d/%m/%Y"),"%Y")) %>% 
  relocate(Year_actual, .after = Datum) %>% 
  ggplot(aes(x = Year_actual, 
              y = `Birth year`)) + 
  geom_jitter()

Clean the age

df <- df %>% 
  mutate(Year_actual = format(as.Date(Datum, format="%d/%m/%Y"),"%Y")) %>% 
  relocate(Year_actual, .after = Datum) %>% 
  # Convert to int
  mutate(`Birth year` = as.integer(`Birth year`)) %>% 
   mutate(Year_actual = as.integer(Year_actual)) %>% 
  # calculate the age
  mutate(Age = Year_actual - `Birth year`) %>% 
  relocate(Age, .after = `Birth year`) %>% 
  select(-c(Year_actual, `Birth year`, Datum))

EDA

How many patients?

df %>% 
  janitor::tabyl(Gender) %>% 
  adorn_pct_formatting() %>% 
   adorn_totals("row")

##  Gender   n percent
##     Man  60   45.5%
##   Woman  72   54.5%
##   Total 132       -

summary(df$Age)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   21.00   28.00   32.00   32.97   37.00   61.00

Ohip by age and gender

df %>% 
  ggplot(aes(x = OHIP, 
             fill = Gender)) + 
  geom_histogram() + 
  facet_grid(. ~ Gender) + 
  labs(title = "Age distribution")

Table

df %>% 
  select(Age, Gender, OHIP) %>% 
  gtsummary::tbl_summary(by = Gender) %>% 
  gtsummary::add_p()

Characteristic	Man, N = 60¹	Woman, N = 72¹	p-value²
Age	35 (31, 38)	30 (27, 34)	<0.001
OHIP	2.2 (0.9, 6.6)	5.0 (3.6, 8.3)	<0.001
¹ Median (IQR) ² Wilcoxon rank sum test

Characteristic

Man, N = 60¹

Woman, N = 72¹

p-value²

Age

35 (31, 38)

30 (27, 34)

<0.001

OHIP

2.2 (0.9, 6.6)

5.0 (3.6, 8.3)

<0.001

¹ Median (IQR)

² Wilcoxon rank sum test

OHIP

OHIP by gender

df %>% 
  ggplot(aes(x = OHIP, 
             fill = Gender)) + 
  geom_histogram() + 
  facet_wrap(Gender ~ .)

df %>%
  ggplot(aes(y = OHIP,
             x = Gender, 
             color = Gender)) +
  geom_violin() +
  geom_boxplot(width = .3) +
  theme(legend.position = "none")  +
  labs(title = "OHIP by Gender")

Regression analysis

df %>% 
  with(glm(OHIP ~ Age + Gender))  %>% 
  gtsummary::tbl_regression()

Characteristic	Beta	95% CI¹	p-value
Age	-0.08	-0.18, 0.02	0.14
Gender
Man	—	—
Woman	2.0	0.64, 3.3	0.005
¹ CI = Confidence Interval

df %>% 
  with(glm(OHIP ~ Age + Gender))  %>% 
  report::report()

## We fitted a linear model (estimated using ML) to predict OHIP with Age and Gender (formula: OHIP ~ Age + Gender). The model's explanatory power is weak (R2 = 0.09). The model's intercept, corresponding to Age = 0 and Gender = Man, is at 6.42 (95% CI [2.80, 10.03], t(129) = 3.48, p < .001). Within this model:
## 
##   - The effect of Age is statistically non-significant and negative (beta = -0.08, 95% CI [-0.18, 0.02], t(129) = -1.49, p = 0.135; Std. beta = -0.13, 95% CI [-0.30, 0.04])
##   - The effect of Gender [Woman] is statistically significant and positive (beta = 1.99, 95% CI [0.64, 3.35], t(129) = 2.88, p = 0.004; Std. beta = 0.50, 95% CI [0.16, 0.84])
## 
## Standardized parameters were obtained by fitting the model on a standardized version of the dataset. 95% Confidence Intervals (CIs) and p-values were computed using

df %>% 
  with(glm(OHIP ~ Age + Gender)) %>% 
  sjPlot::plot_model()

OHIP by age

df %>% 
  ggplot(aes(x = OHIP, 
             y = Age, 
             color = Gender)) + 
  geom_jitter() +
  labs(title = "OHIP and Age by Gender")

Analysis each question

df_long_values <- df %>% 
  select(-c(`Difficulty pronouncing words`:`Totally unable to function`)) %>% 
  pivot_longer(`Difficulty pronouncing words Weighted`:`Totally unable to function Weighted`, 
               names_to = "OHIP_item", 
               values_to = "OHIP_value_weighted")

df_long_likert <- df %>% 
  select(Gender, c(`Difficulty pronouncing words`:`Totally unable to function`)) %>%
  pivot_longer(`Difficulty pronouncing words`:`Totally unable to function`, 
               names_to = "likert_item", 
               values_to = "likert_value")

Data summary

df_long_likert_summary <- df_long_likert %>% 
  group_by(likert_item, likert_value) %>% 
  count(name = "n_answers") %>% 
  group_by(likert_item) %>% 
  mutate(percent_answers = n_answers / sum(n_answers)) %>% 
  ungroup() %>% 
  mutate(percent_answers_label = percent(percent_answers, accuracy = 1))

head(df_long_likert_summary)

## # A tibble: 6 × 5
##   likert_item          likert_value n_answers percent_answers percent_answers_l…
##   <chr>                <chr>            <int>           <dbl> <chr>             
## 1 Difficulty doing us… Fairly often         2          0.0152 2%                
## 2 Difficulty doing us… Hardly ever         29          0.220  22%               
## 3 Difficulty doing us… Never               93          0.705  70%               
## 4 Difficulty doing us… Occasionally         8          0.0606 6%                
## 5 Difficulty eating f… Fairly often         7          0.0530 5%                
## 6 Difficulty eating f… Hardly ever         34          0.258  26%

df_long_likert_summary <- df_long_likert_summary %>% 
  mutate(likert_value = fct_relevel(likert_value, 
                                  "Never", 
                                  "Hardly ever", 
                                  "Occasionally", 
                                   "Fairly often", 
                                  "Very often"))

 df_long_likert_summary %>%
  ggplot(aes(x = likert_item, 
             y = percent_answers,
             fill = likert_value)) +
  geom_col() +
  geom_text(aes(label = percent_answers_label),
            position = position_stack(vjust = 0.5),
            color = "white",
            fontface = "bold") +
  coord_flip() +
  scale_x_discrete() +
  scale_fill_viridis_d() +
  labs(title = "Title",
       x = NULL,
       fill = NULL) +
  theme_minimal() +
  theme(axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        panel.grid = element_blank(),
        legend.position = "top")

Likert plot with sjPlot

df_for_likert <- df %>% 
  # select some columns
  select(Age:`Totally unable to function`) %>% 
  
  # create an ID to not to mess with the pivot_wider afterwards
  mutate(id = row_number()) %>% 
  relocate(id, .before = Age) %>% 

  # reshape for easy wrangling
  pivot_longer(cols = -c(id, Age, Gender)) %>% 
  
  # reorder the levels
  mutate(value = fct_relevel(value, 
                                  "Never", 
                                  "Hardly ever", 
                                  "Occasionally", 
                                   "Fairly often", 
                                  "Very often")) %>% 
  # convert to a numeric variable
  # mutate(value_num = case_when(
  #  value == "Never" ~ "1", 
  #  value == "Hardly ever" ~ "2", 
  #  value == "Occasionally" ~ "3", 
  #  value == "Fairly often" ~ "4", 
  #  TRUE ~ "5"
  # )) %>% 
  # mutate(value_num = as.integer(value_num)) %>% 
  
  # select only these columns

# collapse categories
mutate(value = case_when(
  value == "Fairly often" ~ "Very often", 
  TRUE ~ as.character(as.character(value))
)) %>% 
  




  # select(id, name, value_num) %>% 
  select(id, name, value) %>% 
  # reshape for the graph
  
  pivot_wider(names_from = name, 
              values_from = value)

df_for_likert %>% 
  select(-id) %>% 
  dplyr::mutate_all(., ~ ordered(., levels = c("Very often", "Occasionally", "Hardly ever", "Never"))) %>%
 #  janitor::tabyl(`Pain experience`)


plot_likert(
  # grid.range = c(1.2, 1.4),
 # expand.grid = FALSE,
  sort.frq = "pos.desc", 
  values = "sum.outside"
  #show.prc.sign = TRUE
)

ggsave("likert_all.pdf", width = 23, height = 15, units = "cm")

Men and Women Likert

Men

df_for_likert_man <- df %>% 
  filter(Gender == "Man") %>% 
  # select some columns
  select(Age:`Totally unable to function`) %>% 
  
  # create an ID to not to mess with the pivot_wider afterwards
  mutate(id = row_number()) %>% 
  relocate(id, .before = Age) %>% 

  # reshape for easy wrangling
  pivot_longer(cols = -c(id, Age, Gender)) %>% 
  
  # reorder the levels
  mutate(value = fct_relevel(value, 
                                  "Never", 
                                  "Hardly ever", 
                                  "Occasionally", 
                                   "Fairly often", 
                                  "Very often")) %>% 
  # convert to a numeric variable
  # mutate(value_num = case_when(
  #  value == "Never" ~ "1", 
  #  value == "Hardly ever" ~ "2", 
  #  value == "Occasionally" ~ "3", 
  #  value == "Fairly often" ~ "4", 
  #  TRUE ~ "5"
  # )) %>% 
  # mutate(value_num = as.integer(value_num)) %>% 
  
  # select only these columns

# collapse categories
mutate(value = case_when(
  value == "Fairly often" ~ "Very often", 
  TRUE ~ as.character(as.character(value))
)) %>% 
  




  # select(id, name, value_num) %>% 
  select(id, name, value) %>% 
  # reshape for the graph
  
  pivot_wider(names_from = name, 
              values_from = value)

df_for_likert_man %>% 
  select(-id) %>% 
  dplyr::mutate_all(., ~ ordered(., levels = c("Very often", "Occasionally", "Hardly ever", "Never"))) %>%
 #  janitor::tabyl(`Pain experience`)


plot_likert(
  # grid.range = c(1.2, 1.4),
 # expand.grid = FALSE,
  sort.frq = "pos.desc", 
  values = "sum.outside"
  #show.prc.sign = TRUE
) +
  labs(title = "Men")

ggsave("likert_man.pdf", width = 23, height = 15, units = "cm")

Women Likert

df_for_likert_women <- df %>% 
  filter(Gender == "Woman") %>% 
  # select some columns
  select(Age:`Totally unable to function`) %>% 
  
  # create an ID to not to mess with the pivot_wider afterwards
  mutate(id = row_number()) %>% 
  relocate(id, .before = Age) %>% 

  # reshape for easy wrangling
  pivot_longer(cols = -c(id, Age, Gender)) %>% 
  
  # reorder the levels
  mutate(value = fct_relevel(value, 
                                  "Never", 
                                  "Hardly ever", 
                                  "Occasionally", 
                                   "Fairly often", 
                                  "Very often")) %>% 
  # convert to a numeric variable
  # mutate(value_num = case_when(
  #  value == "Never" ~ "1", 
  #  value == "Hardly ever" ~ "2", 
  #  value == "Occasionally" ~ "3", 
  #  value == "Fairly often" ~ "4", 
  #  TRUE ~ "5"
  # )) %>% 
  # mutate(value_num = as.integer(value_num)) %>% 
  
  # select only these columns

# collapse categories
mutate(value = case_when(
  value == "Fairly often" ~ "Very often", 
  TRUE ~ as.character(as.character(value))
)) %>% 
  




  # select(id, name, value_num) %>% 
  select(id, name, value) %>% 
  # reshape for the graph
  
  pivot_wider(names_from = name, 
              values_from = value)

df_for_likert_women %>% 
  select(-id) %>% 
  dplyr::mutate_all(., ~ ordered(., levels = c("Very often", "Occasionally", "Hardly ever", "Never"))) %>%
 #  janitor::tabyl(`Pain experience`)


plot_likert(
  # grid.range = c(1.2, 1.4),
 # expand.grid = FALSE,
  sort.frq = "pos.desc", 
  values = "sum.outside"
  #show.prc.sign = TRUE
) +
  labs(title = "Women")

ggsave("likert_woman.pdf", width = 23, height = 15, units = "cm")

Likert analysis

Packages

Dataset

cleaning the date

EDA

Ohip by age and gender

OHIP

OHIP by gender

Regression analysis

OHIP by age

Analysis each question

Likert plot with sjPlot

Men and Women Likert

Men

Women Likert