Initial Analysis

library("tidyverse")

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library("gt")
library("gapminder")
library("srvyr")

## 
## Attaching package: 'srvyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter

library("srvyrexploR")
library("fst")
library("ggridges")
library("plotly")

## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

library("DT")
library("gridExtra")

## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine

library("patchwork")
library("reshape2")

## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths

library("finalfit")
library("sjPlot")

## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!

library("plotly")

# Load complete ESS data (490,555 observations)
ess <- read_fst("All-ESS-Data.fst")

table(ess$eisced, useNA = "always")

## 
##     0     1     2     3     4     5     6     7    55    77    88    99  <NA> 
## 73306 38823 71917 74258 87348 49268 42651 49558  1052   483   497  1394     0

#table(ess$empl)
#table(ess$agea)
#table(ess$domicil)
table(ess$gndr)

## 
##      1      2      9 
## 226554 263034    967

ess_clean <- ess %>%
  mutate(
    alc_consumption = case_when(
      alcfreq %in% c(7) ~ "Never",
      alcfreq %in% c(6:4) ~ "Monthly",
      alcfreq %in% c(3:2) ~ "Weekly",
      alcfreq %in% c(1) ~ "Everyday",
      TRUE ~ NA_character_), alc_consumption = factor(alc_consumption, levels = c("Never", "Monthly", "Weekly", "Everyday"))) %>%
  mutate(
    income = case_when(
      hinctnta %in% c(1) ~ "Bottom 10%", 
      hinctnta %in% c(2:5) ~ "11% to 50%",
      hinctnta %in% c(6:9) ~ "51% to 90%",
      hinctnta %in% c(10) ~ "Top 10%",
      TRUE ~ NA_character_
    ),
    income = factor(income, levels = c("Bottom 10%", "11% to 50%", "51% to 90%", "Top 10%"))) %>%
  mutate(
    age = case_when(
      agea %in% c(18:29) ~ "18-29",
      agea %in% c(30:39) ~ "30-39",
      agea %in% c(40:49) ~ "40-49",
      agea %in% c(50:123) ~ "Over 50",
      TRUE ~ NA_character_
      ), age = factor(age, levels = c("18-29", "30-39", "40-49", "Over 50"))) %>%
  mutate(
    gender = case_when(
    gndr == 1 ~ "Male",
    gndr == 2 ~ "Female",
    TRUE ~ NA_character_))

ess_clean <- ess_clean %>%
  filter(!is.na(income), !is.na(age), !is.na(alc_consumption), !is.na(gender), cntry == "SI" | cntry == "FI" | cntry == "IE" | cntry == "PT") %>%
  count(alc_consumption, income, age, cntry, gender)

ess_countries <- ess_clean %>%
  group_by(cntry) %>%
  mutate(
    prop = n/sum(n),
    pct = round(100 * prop, 2))

ess_income <- ess_countries %>%
  select(income, n, pct, prop) %>%
  group_by(income, cntry)

## Adding missing grouping variables: `cntry`

ess_income_gt <- ess_income %>%
  summarise(across(starts_with("prop"), ~sum(., na.rm = TRUE))) %>%
 pivot_wider(names_from = income, values_from = prop)

## `summarise()` has grouped output by 'income'. You can override using the
## `.groups` argument.

Standard_DeviationI <- apply(ess_income_gt[2:5], 1, sd, na.rm = TRUE) #Standard deviation of income across columns 2 to 5

ess_income_gt <- cbind(ess_income_gt, Standard_DeviationI)
colnames(ess_income_gt) <- c("cntry", "Bottom_10", "11_to_50", "51_to_90", "Top_10", "standard_deviation")

ess_alc <- ess_countries %>%
  select(alc_consumption, n, pct, prop) %>%
  group_by(alc_consumption, cntry)

## Adding missing grouping variables: `cntry`

ess_alc_gt <- ess_alc %>%
  summarise(across(starts_with("prop"), ~sum(., na.rm = TRUE))) %>%
  pivot_wider(names_from = alc_consumption, values_from = prop)

## `summarise()` has grouped output by 'alc_consumption'. You can override using
## the `.groups` argument.

Standard_DeviationA <- apply(ess_alc_gt[2:5], 1, sd, na.rm = TRUE) #Standard deviation of income across columns 2 to 5

ess_alc_gt <- cbind(ess_alc_gt, Standard_DeviationA)
colnames(ess_alc_gt) <- c("cntry", "Never", "Monthly", "Weekly", "Everyday", "standard_deviation")

ess_age <- ess_countries %>%
  select(age, n, pct, prop) %>%
  group_by(age, cntry)

## Adding missing grouping variables: `cntry`

ess_age_gt <- ess_age %>%
  summarise(across(starts_with("prop"), ~sum(., na.rm = TRUE))) %>%
 pivot_wider(names_from = age, values_from = prop)

## `summarise()` has grouped output by 'age'. You can override using the `.groups`
## argument.

Standard_DeviationAge <- apply(ess_age_gt[2:5], 1, sd, na.rm = TRUE) #Standard deviation of income across columns 2 to 5

ess_age_gt <- cbind(ess_age_gt, Standard_DeviationAge)
colnames(ess_age_gt) <- c("cntry", "18", "30", "40", "50", "standard_deviation")

ess_gender <- ess_countries %>%
  select(gender, n, pct, prop) %>%
  group_by(gender, cntry)

## Adding missing grouping variables: `cntry`

ess_gender_gt <- ess_gender %>%
  summarise(across(starts_with("prop"), ~sum(., na.rm = TRUE))) %>%
 pivot_wider(names_from = gender, values_from = prop)

## `summarise()` has grouped output by 'gender'. You can override using the
## `.groups` argument.

Standard_DeviationG <- apply(ess_gender_gt[2:3], 1, sd, na.rm = TRUE) #Standard deviation of income across columns 2 to 5

ess_gender_gt <- cbind(ess_gender_gt, Standard_DeviationG)
colnames(ess_gender_gt) <- c("cntry", "Female", "Male", "Standard_deviation")

So the NA’s are here just because there are no responses for thaat category. This is verified by opening the ess_clean df, and filtering to try and find a value for where there is an NA. It is a manual way to check instead of through code, directly after names were assigned to the ess data portal variables.

ess_income_gt %>%
   gt(rowname_col = "row", groupname_col = "group") %>%
  tab_header(md("**Income Summary Statistics for 4 European Countries**"),
             subtitle = md("*Finland, Ireland, Portugal, and Slovenia*")) %>% 
  cols_label(cntry = md("**Country**"), Bottom_10 = md("**Bottom 10%**"), `11_to_50` = md("**11% to 50%**"), `51_to_90` = md("**51% to 90%**"), Top_10 = md("**Top 10%**"), standard_deviation = md("**Standard Deviation**")) %>%
  tab_options(
    table.border.top.width = 4, #making the border lines thicker
    table.border.bottom.width = 4,
    column_labels.border.bottom.width = 2,
    heading.title.font.size = px(18),
    heading.subtitle.font.size = px(12),
    source_notes.font.size = px(10),
    data_row.padding = px(16) #added extra space between the rows so that it was not cramped.
  ) %>%
  fmt_percent(columns = c(Bottom_10, `11_to_50`, `51_to_90`, Top_10, standard_deviation)) %>%
  cols_align(align = "center") %>%
  text_case_match(
    "FI" ~ "Finland", "IE" ~ "Ireland", "PT" ~ "Portugal", "SI" ~ "Slovenia") %>%
  tab_source_note(md("*Data: ESS*"))

Country	Bottom 10%	11% to 50%	51% to 90%	Top 10%	Standard Deviation
Income Summary Statistics for 4 European Countries
Finland, Ireland, Portugal, and Slovenia
Finland	8.63%	38.11%	45.53%	7.74%	19.66%
Ireland	19.61%	53.05%	24.38%	2.97%	20.83%
Portugal	11.75%	51.35%	33.24%	3.66%	21.55%
Slovenia	11.59%	51.59%	32.21%	4.62%	21.25%
Data: ESS

ess_alc_gt %>%
  gt(rowname_col = "row", groupname_col = "group") %>%
  tab_header(md("**Alcohol Consumption Summary Statistics of 4 European Countries**"),
             subtitle = md("*Finland, Ireland, Portugal, and Slovenia*")) %>% 
  cols_label(cntry = md("**Country**"), Never = md("**Never**"), Monthly = md("**Monthly**"), Weekly = md("**Weekly**"), Everyday = md("**Everyday**"), standard_deviation = md("**Standard Deviation**")) %>%
  tab_options(
    table.border.top.width = 4, #making the border lines thicker
    table.border.bottom.width = 4,
    column_labels.border.bottom.width = 2,
    heading.title.font.size = px(18),
    heading.subtitle.font.size = px(12),
    source_notes.font.size = px(10),
    data_row.padding = px(16) #added extra space between the rows so that it was not cramped.
  ) %>%
  fmt_percent(columns = c(Never, Monthly, Weekly, Everyday, standard_deviation)) %>%
  cols_align(align = "center") %>% 
  text_case_match(
    "FI" ~ "Finland", "IE" ~ "Ireland", "PT" ~ "Portugal", "SI" ~ "Slovenia") %>%
  tab_source_note(md("*Data: ESS*"))

Country	Never	Monthly	Weekly	Everyday	Standard Deviation
Alcohol Consumption Summary Statistics of 4 European Countries
Finland, Ireland, Portugal, and Slovenia
Finland	12.13%	51.86%	34.40%	1.62%	22.52%
Ireland	25.86%	30.15%	41.23%	2.76%	16.18%
Portugal	28.61%	26.20%	23.03%	22.16%	2.97%
Slovenia	19.69%	40.72%	32.41%	7.18%	14.69%
Data: ESS

ess_age_gt %>%
   gt(rowname_col = "row", groupname_col = "group") %>%
  tab_header(md("**Age Summary Statistics for 4 European Countries**"),
             subtitle = md("*Finland, Ireland, Portugal, and Slovenia*")) %>% 
  cols_label(cntry = md("**Country**"), "18" = md("**18-29**"), "30" = md("**30-39**"), "40" = md("**40-49**"), "50" = md("**50 and Older**"), standard_deviation = md("**Standard Deviation**")) %>%
  tab_options(
    table.border.top.width = 4, #making the border lines thicker
    table.border.bottom.width = 4,
    column_labels.border.bottom.width = 2,
    heading.title.font.size = px(18),
    heading.subtitle.font.size = px(12),
    source_notes.font.size = px(10),
    data_row.padding = px(16) #added extra space between the rows so that it was not cramped.
  ) %>%
  fmt_percent(columns = c("18", "30", "40", "50", standard_deviation)) %>%
  cols_align(align = "center") %>%
  text_case_match(
    "FI" ~ "Finland", "IE" ~ "Ireland", "PT" ~ "Portugal", "SI" ~ "Slovenia") %>%
  tab_source_note(md("*Data: ESS*"))

Country	18-29	30-39	40-49	50 and Older	Standard Deviation
Age Summary Statistics for 4 European Countries
Finland, Ireland, Portugal, and Slovenia
Finland	13.96%	13.75%	14.90%	57.40%	21.60%
Ireland	13.30%	18.81%	16.75%	51.14%	17.57%
Portugal	11.37%	16.18%	14.74%	57.71%	21.90%
Slovenia	14.77%	14.97%	15.18%	55.08%	20.05%
Data: ESS

ess_gender_gt %>%
  gt(rowname_col = "row", groupname_col = "group") %>%
  tab_header(md("**Gender Summary Statistics of 4 European Countries**"),
             subtitle = md("*Finland, Ireland, Portugal, and Slovenia*")) %>% 
  cols_label(cntry = md("**Country**"), Female = md("**Female**"), Male = md("**Male**"), Standard_deviation = md("**Standard Deviation**")) %>%
  tab_options(
    table.border.top.width = 4, #making the border lines thicker
    table.border.bottom.width = 4,
    column_labels.border.bottom.width = 2,
    heading.title.font.size = px(18),
    heading.subtitle.font.size = px(12),
    source_notes.font.size = px(10),
    data_row.padding = px(16) #added extra space between the rows so that it was not cramped.
  ) %>%
  fmt_percent(columns = c(Female, Male, Standard_deviation)) %>%
  cols_align(align = "center") %>% 
  text_case_match(
    "FI" ~ "Finland", "IE" ~ "Ireland", "PT" ~ "Portugal", "SI" ~ "Slovenia") %>%
  tab_source_note(md("*Data: ESS*"))

Country	Female	Male	Standard Deviation
Gender Summary Statistics of 4 European Countries
Finland, Ireland, Portugal, and Slovenia
Finland	50.29%	49.71%	0.41%
Ireland	54.05%	45.95%	5.73%
Portugal	54.24%	45.76%	5.99%
Slovenia	54.77%	45.23%	6.74%
Data: ESS

labeller = labeller(cntry = 
    c("FI" = "Finland",
      "IE" = "Ireland",
      "PT" = "Portugal",
      "SI" = "Slovenia"))

ess_income_plot <- ess_income %>%
  summarise(across(starts_with("prop"), ~sum(., na.rm = TRUE)))

## `summarise()` has grouped output by 'income'. You can override using the
## `.groups` argument.

income_plot <- ess_income_plot %>%
  ggplot(aes(x = income, y = prop, fill = income, color = income)) +
  geom_col(color = "darkgrey") +
  theme_minimal() +
  scale_fill_viridis_d(option = "E") +
  scale_y_continuous(labels = scales::percent, limits = c(0, 2.5)) +
  labs(title = "Income plot", y = "Percentage of Responses", x = "Income", fill = "Income") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_viridis_d(option = "G")

## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

income_plot

ess_alc_plot <- ess_alc %>%
  summarise(across(starts_with("prop"), ~sum(., na.rm = TRUE)))

## `summarise()` has grouped output by 'alc_consumption'. You can override using
## the `.groups` argument.

alc_plot <- ess_alc_plot %>%
  ggplot(aes(x = alc_consumption, y = prop, fill = alc_consumption, color = alc_consumption)) +
  geom_col(color = "darkgrey") +
  theme_minimal() +
  scale_fill_viridis_d(option = "E") +
  scale_y_continuous(labels = scales::percent, limits = c(0, 1.5)) +
  labs(title = "Alcohol Consumption plot", y = "Percentage of Responses", x = "Alcohol Consumption", fill = "Alcohol Consumption Levels") + 
  scale_fill_viridis_d(option = "G") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

alc_plot

ess_age_plot <- ess_age%>%
  summarise(across(starts_with("prop"), ~sum(., na.rm = TRUE)))

## `summarise()` has grouped output by 'age'. You can override using the `.groups`
## argument.

age_plot <- ess_age_plot %>%
  ggplot(aes(x = age, y = prop, fill = age)) +
  geom_col(color = "darkgrey") +
  theme_minimal() +
  scale_fill_viridis_d(option = "E") +
  scale_y_continuous(labels = scales::percent, limits = c(0, 3)) +
  labs(title = "Age plot", y = "Percentage of Responses", x = "Age", fill = "Age") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_viridis_d(option = "G")

## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

age_plot

ess_gender_plot <- ess_gender %>%
  summarise(across(starts_with("prop"), ~sum(., na.rm = TRUE)))

## `summarise()` has grouped output by 'gender'. You can override using the
## `.groups` argument.

gender_plot <- ess_gender_plot %>%
  ggplot(aes(x = gender, y = prop, fill = gender)) +
  geom_col(color = "darkgrey") +
  theme_minimal() +
  scale_y_continuous(labels = scales::percent, limits = c(0,2.5)) +
  labs(title = "Gender plot", y = "Percentage of Responses", x = "Gender", fill = "Gender") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_viridis_d(option = "G")
gender_plot

ess_income_alc <- ess_countries %>%
  select(income, alc_consumption, n, pct, prop) %>%
  group_by(income, alc_consumption, cntry)

## Adding missing grouping variables: `cntry`

ess_income_alc_gt <- ess_income_alc %>%
  summarise(across(starts_with("prop"), ~sum(., na.rm = TRUE))) %>%
 pivot_wider(names_from = c(income, cntry), values_from = prop)

## `summarise()` has grouped output by 'income', 'alc_consumption'. You can
## override using the `.groups` argument.

#NA

colnames(ess_income_alc_gt) <- c("alc_consumption", "bottom_10_FI", "bottom_10_IE", "bottom_10_PT", "bottom_10_SI", "elev_FI", "elev_IE", "elev_PT", "elev_SI", "fif_FI", "fif_IE", "fif_PT", "fif_SI", "top_FI", "top_IE", "top_PT", "top_SI")

ess_income_alc_gt <- ess_income_alc_gt %>%
  mutate(bottom_10_FI= replace_na(bottom_10_FI, 0), top_FI = replace_na(top_FI, 0), top_IE = replace_na(top_IE, 0), top_PT = replace_na(top_PT, 0), top_SI = replace_na(top_SI, 0))

ess_income_alc_plot <- ess_income_alc %>%
  summarise(across(starts_with("pct"), ~sum(., na.rm = TRUE)))

## `summarise()` has grouped output by 'income', 'alc_consumption'. You can
## override using the `.groups` argument.

countries_plot <- ggplot(data = ess_income_alc_plot %>%
           filter(!is.na(alc_consumption), !is.na(income), !is.na(pct)), mapping = aes(x = income, y = pct, fill = alc_consumption)) +
    geom_bar(position = "stack", stat = "identity",alpha = 0.9) +
labs(title = "Relationship between Income and Alcohol", x = "Income", y = "Percentage", fill = "Alcohol Consumption Levels") +  theme_minimal() + 
  scale_fill_viridis_d(option = "D") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) 

countries_plot

ess_income_alc_plot <- ess_income_alc %>%
  summarise(across(starts_with("pct"), ~sum(., na.rm = TRUE)))

## `summarise()` has grouped output by 'income', 'alc_consumption'. You can
## override using the `.groups` argument.

countries_plot <- ggplot(data = ess_income_alc_plot %>%
           filter(!is.na(alc_consumption), !is.na(income), !is.na(pct)), mapping = aes(x = income, y = pct, fill = alc_consumption)) +
    geom_bar(position = "stack", stat = "identity",alpha = 0.9, color = "white") +
labs(title = "Relationship between Income and Alcohol in 4 European Countries", x = "Income", y = "Percentage", fill = "Alcohol Consumption Levels") +  theme_minimal() + 
  scale_fill_viridis_d(option = "D") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_wrap(~cntry, labeller = labeller)

countries_plot

ess_gender_alc <- ess_countries %>%
  select(gender, alc_consumption, n, pct, cntry, prop) %>%
  group_by(gender, alc_consumption, cntry) 


ess_gender_alc_gt <- ess_gender_alc %>%
  summarise(across(starts_with("prop"), ~sum(., na.rm = TRUE))) %>%
  pivot_wider(names_from = c(cntry, gender), values_from = prop)

## `summarise()` has grouped output by 'gender', 'alc_consumption'. You can
## override using the `.groups` argument.

ess_age_alc <- ess_countries %>%
  select(
    alc_consumption, age, n, pct, cntry, prop) %>%
  group_by(age, alc_consumption, cntry)

ess_age_alc_gt <- ess_age_alc %>%
  summarise(across(starts_with("prop"), ~sum(., na.rm = TRUE))) %>%
  pivot_wider(names_from = c(cntry, age), values_from = prop)

## `summarise()` has grouped output by 'age', 'alc_consumption'. You can override
## using the `.groups` argument.

#SAME AS ABOVE, NA'S

colnames(ess_age_alc_gt) <- c("alc_consumption", "FI18", "IE18", "PT18", "SI18", "FI30", "IE30", "PT30", "SI30", "FI40", "IE40", "PT40", "SI40", "FI50", "IE50", "PT50", "SI50")

ess_age_alc_gt <- ess_age_alc_gt %>%
  mutate(FI18= replace_na(FI18, 0), SI18 = replace_na(SI18, 0), FI30 = replace_na(FI30, 0), IE30 = replace_na(IE30, 0), FI40 = replace_na(FI40, 0))

ess_age_alc_gt

## # A tibble: 4 × 17
## # Groups:   alc_consumption [4]
##   alc_consumption   FI18    IE18     PT18    SI18   FI30    IE30   PT30    SI30
##   <fct>            <dbl>   <dbl>    <dbl>   <dbl>  <dbl>   <dbl>  <dbl>   <dbl>
## 1 Never           0.0120 0.0233  0.0212   0.0133  0.0105 0.0350  0.0318 0.0174 
## 2 Monthly         0.0988 0.0456  0.0530   0.0872  0.0852 0.0800  0.0636 0.0790 
## 3 Weekly          0.0288 0.0625  0.0385   0.0462  0.0418 0.0715  0.0520 0.0513 
## 4 Everyday        0      0.00159 0.000963 0.00103 0      0.00159 0.0145 0.00205
## # ℹ 8 more variables: FI40 <dbl>, IE40 <dbl>, PT40 <dbl>, SI40 <dbl>,
## #   FI50 <dbl>, IE50 <dbl>, PT50 <dbl>, SI50 <dbl>

ess_income_alc_gt %>% gt(rowname_col = "row", groupname_col = "group") %>%
  tab_header(md("**Income and Alcohol Consumption table for 4 European Countries**"),
             subtitle = md("*Finland, Ireland, Portugal, and Slovenia*")) %>% 
  tab_options(
    table.border.top.width = 4, #making the border lines thicker
    table.border.bottom.width = 4,
    column_labels.border.bottom.width = 2,
    heading.title.font.size = px(18),
    heading.subtitle.font.size = px(12),
    source_notes.font.size = px(10),
    data_row.padding = px(16) #added extra space between the rows so that it was not cramped.
  ) %>%
  cols_align("center") %>%
  tab_source_note(md("*Data: ESS*")) %>%
  fmt_percent(columns = c("bottom_10_FI", "bottom_10_IE", "bottom_10_PT", "bottom_10_SI", "elev_FI", "elev_IE", "elev_PT", "elev_SI", "fif_FI", "fif_IE", "fif_PT", "fif_SI", "top_FI", "top_IE", "top_PT", "top_SI")) %>%
  tab_spanner(label = md("**Bottom 10%**"), columns = c(bottom_10_FI, bottom_10_IE, bottom_10_PT, bottom_10_SI)) %>%
  tab_spanner(label = md("**11% to 50%**"), columns = c(elev_FI, elev_IE, elev_PT, elev_SI)) %>%
  tab_spanner(label = md("**51% to 90%**"), columns = c(fif_FI, fif_IE, fif_PT, fif_SI)) %>%
  tab_spanner(label = md("**Top 10%**"), columns = c(top_FI, top_IE, top_PT, top_SI)) %>%
  cols_label(alc_consumption = md("**Alcohol Consumption**"), bottom_10_FI = md("*Finland*"), bottom_10_IE = md("*Ireland*"), bottom_10_PT = md("*Portugal*"), bottom_10_SI = md("*Slovenia*"), elev_FI = md("*Finland*"), elev_IE = md("*Ireland*"), elev_PT = md("*Portugal*"), elev_SI = md("*Slovenia*"), fif_FI = md("*Finland*"), fif_IE = md("*Ireland*"), fif_PT = md("*Portugal*"), fif_SI = md("*Slovenia*"), top_FI = md("*Finland*"), top_IE = md("*Ireland*"), top_PT = md("*Portugal*"), top_SI = md("*Slovenia*"))

Alcohol Consumption	Bottom 10%				11% to 50%				51% to 90%				Top 10%
Income and Alcohol Consumption table for 4 European Countries
Finland, Ireland, Portugal, and Slovenia
Alcohol Consumption	Finland	Ireland	Portugal	Slovenia	Finland	Ireland	Portugal	Slovenia	Finland	Ireland	Portugal	Slovenia	Finland	Ireland	Portugal	Slovenia
Never	1.93%	6.31%	5.68%	3.79%	6.59%	15.10%	15.90%	11.49%	3.19%	4.08%	6.65%	4.10%	0.42%	0.37%	0.39%	0.31%
Monthly	4.91%	5.72%	2.31%	3.49%	20.91%	15.69%	12.52%	20.41%	22.37%	7.84%	9.73%	14.36%	3.66%	0.90%	1.64%	2.46%
Weekly	1.67%	7.00%	1.64%	3.08%	10.04%	21.09%	10.40%	15.38%	19.18%	11.61%	9.63%	12.31%	3.50%	1.54%	1.35%	1.64%
Everyday	0.10%	0.58%	2.12%	1.23%	0.58%	1.17%	12.52%	4.31%	0.78%	0.85%	7.23%	1.44%	0.16%	0.16%	0.29%	0.21%
Data: ESS

ess_age_alc_gt %>% gt(rowname_col = "row", groupname_col = "group") %>%
  tab_header(md("**Comparison table for Alcohol Consumption and Age**"),
             subtitle = md("*In four European Countries: Finland, Ireland, Portugal, and Slovenia*")) %>% 
  tab_options(
    table.border.top.width = 4, #making the border lines thicker
    table.border.bottom.width = 4,
    column_labels.border.bottom.width = 2,
    heading.title.font.size = px(18),
    heading.subtitle.font.size = px(12),
    source_notes.font.size = px(10),
    data_row.padding = px(16) #added extra space between the rows so that it was not cramped.
  ) %>%
  tab_source_note(md("*Data: ESS*")) %>%
  fmt_percent(columns = c("FI18", "IE18", "PT18", "SI18", "FI30", "IE30", "PT30", "SI30", "FI40", "IE40", "PT40", "SI40", "FI50", "IE50", "PT50", "SI50")) %>%
  tab_spanner(label = md("**18 to 29 years old**"), columns = c(FI18, IE18, PT18, SI18)) %>%
  tab_spanner(label = md("**30 to 39 years old**"), columns = c(FI30, IE30, PT30, SI30)) %>%
  tab_spanner(label = md("**40 to 49 years old**"), columns = c(FI40, IE40, PT40, SI40)) %>%
  tab_spanner(label = md("**50 years and older**"), columns = c(FI50, IE50, PT50, SI50)) %>%
  cols_label(alc_consumption = md("**Alcohol Consumption**"), FI18 = md("*Finland*"), IE18 = md("*Ireland*"), PT18 = md("*Portugal*"), SI18 = md("*Slovenia*"), FI30 = md("*Finland*"), IE30 = md("*Ireland*"), PT30 = md("*Portugal*"), SI30 = md("*Slovenia*"), FI40 = md("*Finland*"), IE40 = md("*Ireland*"), PT40 = md("*Portugal*"), SI40 = md("*Slovenia*"), FI50 = md("*Finland*"), IE50 = md("*Ireland*"), PT50 = md("*Portugal*"), SI50 = md("*Slovenia*"))

Alcohol Consumption	18 to 29 years old				30 to 39 years old				40 to 49 years old				50 years and older
Comparison table for Alcohol Consumption and Age
In four European Countries: Finland, Ireland, Portugal, and Slovenia
Alcohol Consumption	Finland	Ireland	Portugal	Slovenia	Finland	Ireland	Portugal	Slovenia	Finland	Ireland	Portugal	Slovenia	Finland	Ireland	Portugal	Slovenia
Never	1.20%	2.33%	2.12%	1.33%	1.05%	3.50%	3.18%	1.74%	1.25%	3.39%	2.70%	2.36%	8.63%	16.64%	20.62%	14.26%
Monthly	9.88%	4.56%	5.30%	8.72%	8.52%	8.00%	6.36%	7.90%	7.42%	5.78%	5.11%	7.28%	26.03%	11.82%	9.44%	16.82%
Weekly	2.88%	6.25%	3.85%	4.62%	4.18%	7.15%	5.20%	5.13%	5.96%	7.10%	4.43%	4.92%	21.38%	20.72%	9.54%	17.74%
Everyday	0.00%	0.16%	0.10%	0.10%	0.00%	0.16%	1.45%	0.21%	0.26%	0.48%	2.50%	0.62%	1.36%	1.96%	18.11%	6.26%
Data: ESS

ess_gender_alc_gt %>% gt(rowname_col = "row", groupname_col = "group") %>%
  tab_header(md("**Comparison table for Alcohol Consumption and Gender**"),
             subtitle = md("*In four European Countries: Finland, Ireland, Portugal, and Slovenia*")) %>% 
  tab_options(
    table.border.top.width = 4, #making the border lines thicker
    table.border.bottom.width = 4,
    column_labels.border.bottom.width = 2,
    heading.title.font.size = px(18),
    heading.subtitle.font.size = px(12),
    source_notes.font.size = px(10),
    data_row.padding = px(16) #added extra space between the rows so that it was not cramped.
  ) %>%
  tab_source_note(md("*Data: ESS*")) %>%
  fmt_percent(columns = c("FI_Female", "IE_Female", "PT_Female", "SI_Female", "FI_Male", "IE_Male", "PT_Male", "SI_Male")) %>%
  tab_spanner(label = md("**Female**"), columns = c(FI_Female, IE_Female, PT_Female, SI_Female)) %>%
  tab_spanner(label = md("**Male**"), columns = c(FI_Male, IE_Male, PT_Male, SI_Male)) %>%
  cols_label(alc_consumption = md("**Alcohol Consumption**"), FI_Female = md("*Finland*"), IE_Female = md("*Ireland*"), PT_Female = md("*Portugal*"), SI_Female = md("*Slovenia*"), FI_Male = md("*Finland*"), IE_Male = md("*Ireland*"), PT_Male = md("*Portugal*"), SI_Male = md("*Slovenia*"))

Alcohol Consumption	Female				Male
Comparison table for Alcohol Consumption and Gender
In four European Countries: Finland, Ireland, Portugal, and Slovenia
Alcohol Consumption	Finland	Ireland	Portugal	Slovenia	Finland	Ireland	Portugal	Slovenia
Never	7.27%	15.37%	22.64%	13.44%	4.86%	10.49%	5.97%	6.26%
Monthly	30.37%	18.87%	15.99%	27.79%	21.48%	11.29%	10.21%	12.92%
Weekly	12.28%	18.92%	9.06%	12.41%	22.11%	22.31%	13.97%	20.00%
Everyday	0.37%	0.90%	6.55%	1.13%	1.25%	1.85%	15.61%	6.05%
Data: ESS

ess_gender_alc_plot <- ess_gender_alc %>%
  summarise(across(starts_with("prop"), ~sum(., na.rm = TRUE)))

## `summarise()` has grouped output by 'gender', 'alc_consumption'. You can
## override using the `.groups` argument.

ess_gender_alc_plot %>%
  ggplot(data = ess_gender_alc_plot %>%
           filter(!is.na(alc_consumption), !is.na(gender), !is.na(prop)), mapping = aes(x = gender, y = prop, fill = alc_consumption)) +
    geom_bar(position = "stack", stat = "identity",alpha = 0.9) +
labs(title = "Relationship between Gender and Alcohol", x = "Gender", y = "Percentage", fill = "Alcohol Consumption Levels") +  theme_minimal() + 
  scale_fill_viridis_d(option = "G") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(labels = scales::percent, limits = c(0,2.5))

ess_age_alc_plot <- ess_age_alc %>%
  summarise(across(starts_with("prop"), ~sum(., na.rm = TRUE)))

## `summarise()` has grouped output by 'age', 'alc_consumption'. You can override
## using the `.groups` argument.

ess_age_alc_plot %>%
  ggplot(data = ess_age_alc_plot %>%
           filter(!is.na(alc_consumption), !is.na(age), !is.na(prop)), mapping = aes(x = age, y = prop, fill = alc_consumption)) +
    geom_bar(position = "stack", stat = "identity",alpha = 0.9) +
labs(title = "Relationship between Age and Alcohol", x = "Age", y = "Percentage", fill = "Alcohol Consumption Levels") +  theme_minimal() + 
  scale_fill_viridis_d(option = "G") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(labels = scales::percent, limits = c(0,3))

Initial Analysis

2025-03-02