library("tidyverse")
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("gt")
library("gapminder")
library("srvyr")
## 
## Attaching package: 'srvyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
library("srvyrexploR")
library("fst")
library("ggridges")
library("plotly")
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library("DT")
library("gridExtra")
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
library("patchwork")
library("reshape2")
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths
library("finalfit")
library("sjPlot")
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
library("plotly")
library("kableExtra")
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:srvyr':
## 
##     group_rows
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library("broom")
library("modelsummary")
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
##   options(modelsummary_factory_latex = 'kableExtra')
##   options(modelsummary_factory_html = 'kableExtra')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
library("ggrepel")
library(geomtextpath)
library("interactions")
library("jtools")
# Load complete ESS data (490,555 observations)
ess <- read_fst("All-ESS-Data.fst")

###Methods and data tables and plots section

#Set up for table

ess_clean <- ess %>%
  filter(cntry %in% c("SI", "FI", "IE", "PT")) %>%
  mutate(alc_consumption = case_when(
    alcfreq %in% c(7) ~ "Never", 
     alcfreq %in% c(4:6) ~ "Monthly",
      alcfreq %in% c(2:3) ~ "Weekly",
      alcfreq %in% c(1) ~ "Everyday",
      TRUE ~ NA_character_
    ),
    alc_consumption = factor(alc_consumption, levels = c("Never", "Monthly", "Weekly", "Everyday")),
    
    income = case_when(
      hinctnta %in% c(1) ~ "Bottom 10%", 
      hinctnta %in% c(2:5) ~ "11% to 50%",
      hinctnta %in% c(6:9) ~ "51% to 90%",
      hinctnta %in% c(10) ~ "Top 10%",
      TRUE ~ NA_character_
    ),
    income = factor(income, levels = c("Bottom 10%", "11% to 50%", "51% to 90%", "Top 10%")),
    
    age = case_when(
      agea %in% c(18:29) ~ "18-29",
      agea %in% c(30:39) ~ "30-39",
      agea %in% c(40:49) ~ "40-49",
      agea %in% c(50:59) ~ "50-59",
      agea %in% c(60:69) ~ "60-69",
      agea %in% c(70:79) ~ "70-79",
      agea >= 80 ~ "80+",
      TRUE ~ NA_character_
    ), 
    age = factor(age, levels = c("18-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80+")),
    
    gender = case_when(
      gndr == 1 ~ "Male",
      gndr == 2 ~ "Female",
      TRUE ~ NA_character_
    ))

ess_clean <- ess_clean %>% dplyr::select(alc_consumption, income, age, gender, cntry) %>% filter(cntry == "FI" | cntry == "IE" | cntry == "PT" | cntry == "SI", !is.na(alc_consumption), !is.na(income), !is.na(age), !is.na(gender), !is.na(cntry))
#Set up for all non regression things
ess_clean_regression <- ess %>%
  filter(cntry %in% c("SI", "FI", "IE", "PT")) %>%
  mutate(
    alc_consumption = case_when(
      alcfreq > 7 ~ NA_real_,
      TRUE ~ as.numeric(alcfreq)
    ), 
    
    income = case_when(
      hinctnta %in% c(1) ~ "Bottom 10%", 
      hinctnta %in% c(2:5) ~ "11% to 50%",
      hinctnta %in% c(6:9) ~ "51% to 90%",
      hinctnta %in% c(10) ~ "Top 10%",
      TRUE ~ NA_character_
    ),
    income = factor(income, levels = c("Bottom 10%", "11% to 50%", "51% to 90%", "Top 10%")),
    
    age = case_when(
      agea %in% c(18:29) ~ "18-29",
      agea %in% c(30:39) ~ "30-39",
      agea %in% c(40:49) ~ "40-49",
      agea %in% c(50:59) ~ "50-59",
      agea %in% c(60:69) ~ "60-69",
      agea %in% c(70:79) ~ "70-79",
      agea >= 80 ~ "80+",
      TRUE ~ NA_character_
    ), 
    age = factor(age, levels = c("18-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80+")),
    
    gender = case_when(
      gndr == 1 ~ "Male",
      gndr == 2 ~ "Female",
      TRUE ~ NA_character_
    ))

ess_clean_regression <- ess_clean_regression %>% dplyr::select(alc_consumption, income, age, gender, cntry) %>% filter(cntry == "FI" | cntry == "IE" | cntry == "PT" | cntry == "SI", !is.na(alc_consumption), !is.na(income), !is.na(age), !is.na(gender), !is.na(cntry))
#set up for regression things
ess_complete <- ess_clean %>%
  filter(!is.na(alc_consumption), !is.na(income), !is.na(age), !is.na(gender))

country_counts <- ess %>%
  filter(cntry %in% c("SI", "FI", "IE", "PT")) %>%
  count(cntry)

transformed_counts <- ess_clean %>%
  filter(!is.na(alc_consumption) | !is.na(income) | !is.na(age) | !is.na(gender)) %>%
  count(cntry)

complete_counts <- ess_clean %>%
  filter(!is.na(alc_consumption), !is.na(income), !is.na(age), !is.na(gender)) %>%
  count(cntry)
#code from fixed initial analysis rmarkdown that prof parker provided us for the correct cleaning.
complete_counts
##   cntry    n
## 1    FI 1913
## 2    IE 1893
## 3    PT 1038
## 4    SI  975
ess_income <- ess_complete %>%
  filter(!is.na(income)) %>%
  count(income, cntry) %>%
  group_by(cntry) %>%
  mutate(
    prop = n/sum(n),
    pct = round(100 * prop, 2)
  )

# Age 
ess_age <- ess_complete %>%
  filter(!is.na(age)) %>%
  count(age, cntry) %>%
  group_by(cntry) %>%
  mutate(
    prop = n/sum(n),
    pct = round(100 * prop, 2)
  )

# Alcohol
ess_alc <- ess_complete %>%
  filter(!is.na(alc_consumption)) %>%
  count(alc_consumption, cntry) %>%
  group_by(alc_consumption) %>%
  mutate(
    prop = n/sum(n),
    pct = round(100 * prop, 2) 
  )

# Gender
ess_gender <- ess_complete %>%
  filter(!is.na(gender)) %>%
  count(gender, cntry) %>%
  group_by(cntry) %>%
  mutate(
    prop = n/sum(n),
    pct = round(100 * prop, 2)
  )

# Income and alcohol consumption
ess_income_alc <- ess_complete %>%
  filter(!is.na(income), !is.na(alc_consumption)) %>%
  count(income, alc_consumption) %>%
  group_by(income) %>%
  mutate(
    prop = n/sum(n),
    pct = round(100 * prop, 2)
  )

labeller = ggplot2::labeller(cntry = 
    c("FI" = "Finland",
      "IE" = "Ireland",
      "PT" = "Portugal",
      "SI" = "Slovenia"))
#set ups for plots that show pct and counts.
ess_gender_sample <- ess_gender %>%
  dplyr::select(n, gender) %>%
  group_by(gender) %>%
  summarise(across(starts_with("n"), ~sum(., na.rm = TRUE)))
## Adding missing grouping variables: `cntry`
pivot_gender <- ess_gender %>%
  dplyr::select(gender, cntry, n, pct, prop) %>%
  group_by(cntry, gender) %>%
 summarise(Percent = sum(prop, na.rm=TRUE),
            Respondents = sum(n, na.rm = TRUE)) %>% 
  pivot_wider(names_from = cntry, values_from = c(Percent, Respondents)) 
## `summarise()` has grouped output by 'cntry'. You can override using the
## `.groups` argument.
pivot_gender <- cbind(ess_gender_sample, pivot_gender)

colnames(pivot_gender) <- c("Gender", "Sample_Size", "DROP", "FI_PCT", "IE_PCT", "PT_PCT", "SI_PCT", "FI_N", "IE_N", "PT_N", "SI_N")

pivot_gender$DROP = NULL
#here I am finding the gender sample size, using pivot wider so it shows the sample size in numbers and pct, then using cbind to put it together
ess_income_sample <- ess_income %>%
  dplyr::select(n, income) %>%
  group_by(income) %>%
  summarise(across(starts_with("n"), ~sum(., na.rm = TRUE)))
## Adding missing grouping variables: `cntry`
pivot_income <- ess_income %>%
  dplyr::select(income, cntry, n, pct, prop) %>%
  group_by(cntry, income) %>%
  summarise(Percent = sum(prop, na.rm=TRUE),
            Respondents = sum(n, na.rm = TRUE)) %>% 
  pivot_wider(names_from = cntry, values_from = c(Percent, Respondents)) 
## `summarise()` has grouped output by 'cntry'. You can override using the
## `.groups` argument.
pivot_income <- cbind(ess_income_sample, pivot_income)

colnames(pivot_income) <- c("Income", "Sample_Size", "DROP", "FI_PCT", "IE_PCT", "PT_PCT", "SI_PCT", "FI_N", "IE_N", "PT_N", "SI_N")

pivot_income$DROP = NULL
#here I am finding the income sample size, using pivot wider so it shows the sample size in numbers and pct, then using cbind to put it together
ess_alc_sample <- ess_alc %>%
  dplyr::select(n, alc_consumption) %>%
  group_by(alc_consumption) %>%
  summarise(across(starts_with("n"), ~sum(., na.rm = TRUE)))

pivot_alc <- ess_alc %>%
  dplyr::select(alc_consumption, cntry, n, pct, prop) %>%
  group_by(cntry, alc_consumption) %>%
  summarise(Percent = sum(prop, na.rm=TRUE),
            Respondents = sum(n, na.rm = TRUE)) %>% 
  pivot_wider(names_from = cntry, values_from = c(Percent, Respondents)) 
## `summarise()` has grouped output by 'cntry'. You can override using the
## `.groups` argument.
pivot_alc <- cbind(ess_alc_sample, pivot_alc)

colnames(pivot_alc) <- c("Alcohol_Consumption", "Sample_Size", "DROP", "FI_PCT", "IE_PCT", "PT_PCT", "SI_PCT", "FI_N", "IE_N", "PT_N", "SI_N")

pivot_alc$DROP = NULL
#here I am finding the alcohol consumption sample size, using pivot wider so it shows the sample size in numbers and pct, then using cbind to put it together
ess_age_sample <- ess_age %>%
  dplyr::select(n, age) %>%
  group_by(age) %>%
  summarise(across(starts_with("n"), ~sum(., na.rm = TRUE)))
## Adding missing grouping variables: `cntry`
pivot_age <- ess_age %>%
  dplyr::select(age, cntry, n, pct, prop) %>%
  group_by(cntry, age) %>%
  summarise(Percent = sum(prop, na.rm=TRUE),
            Respondents = sum(n, na.rm = TRUE)) %>% 
  pivot_wider(names_from = cntry, values_from = c(Percent, Respondents)) 
## `summarise()` has grouped output by 'cntry'. You can override using the
## `.groups` argument.
pivot_age <- cbind(ess_age_sample, pivot_age)

colnames(pivot_age) <- c("Age", "Sample_Size", "DROP", "FI_PCT", "IE_PCT", "PT_PCT", "SI_PCT", "FI_N", "IE_N", "PT_N", "SI_N")

pivot_age$DROP = NULL
#here I am finding the age sample size, using pivot wider so it shows the sample size in numbers and pct, then using cbind to put it together
binded <- bind_rows(
  pivot_alc %>%
    mutate(characteristic = "Alcohol Consumption",
           category = Alcohol_Consumption) %>%
    dplyr::select(-Alcohol_Consumption),
  
  pivot_income %>% 
    mutate(characteristic = "Income",
           category = Income) %>%
    dplyr::select(-Income),
  
  pivot_age %>%
    mutate(characteristic = "Age",
           category = Age) %>%
    dplyr::select(-Age),
  
  pivot_gender %>%
    mutate(characteristic = "Gender",
           category = Gender) %>%
    dplyr::select(-Gender))
#since I have all those cbinded tables, now I am using bind_rows to put them all on top of eachother to create one simple statistics table.

#Summary Table

Table1 <- binded %>%
  gt(
    groupname_col = "characteristic"
  ) %>%
  cols_move_to_start(category) %>%
  tab_header(
    title = md("**Table 1**"),
    subtitle = md("*Source: European Social Survey*")
  ) %>%
  tab_spanner(label = md("**Respondents**"), columns = c("FI_N", "IE_N", "PT_N", "SI_N")) %>%
  tab_spanner(label = md("**Percentage**"), columns = c("FI_PCT", "IE_PCT", "PT_PCT", "SI_PCT")) %>%
  cols_label(
    category = md("**Category**"),
    Sample_Size = md("*Sample Size*"),
    FI_N = md("*Finland*"),
    IE_N = md("*Ireland*"),
    PT_N = md("*Portugal*"),
    SI_N = md("*Slovenia*"), 
    FI_PCT = md("*Finland*"),
    IE_PCT = md("*Ireland*"),
    PT_PCT = md("*Portugal*"),
    SI_PCT = md("*Slovenia*")
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_row_groups()
  ) %>%
  fmt_percent(columns = c("FI_PCT", "IE_PCT", "PT_PCT", "SI_PCT")) %>%
   tab_options(
    table.border.top.width = 4, 
    table.border.bottom.width = 4,
    column_labels.border.bottom.width = 2,
    heading.title.font.size = px(18),
    heading.subtitle.font.size = px(14),
    data_row.padding = px(8)
  ) %>%
  cols_align("center") %>%
  tab_style(
    style = list(
      cell_fill(color = "#f5f5f5")
    ),
    locations = cells_body(rows = seq(1, 17, 2))
  ) %>%
  tab_source_note(source_note = md("*Total Sample Size: 5918*"))
Table1
Table 1
Source: European Social Survey
Category Sample Size
Percentage
Respondents
Finland Ireland Portugal Slovenia Finland Ireland Portugal Slovenia
Alcohol Consumption
Never 1212 19.14% 40.51% 24.50% 15.84% 232 491 297 192
Monthly 2232 44.44% 25.58% 12.19% 17.79% 992 571 272 397
Weekly 1992 33.03% 39.11% 12.00% 15.86% 658 779 239 316
Everyday 383 8.09% 13.58% 60.05% 18.28% 31 52 230 70
Income
Bottom 10% 771 8.63% 19.60% 11.75% 11.59% 165 371 122 113
11% to 50% 2769 38.11% 53.04% 51.35% 51.59% 729 1004 533 503
51% to 90% 1992 45.53% 24.41% 33.24% 32.21% 871 462 345 314
Top 10% 287 7.74% 2.96% 3.66% 4.62% 148 56 38 45
Age
18-29 780 13.96% 13.26% 11.37% 14.77% 267 251 118 144
30-39 932 13.75% 18.75% 16.18% 14.97% 263 355 168 146
40-49 902 14.90% 16.69% 14.74% 15.18% 285 316 153 148
50-59 1038 19.08% 17.12% 15.51% 19.28% 365 324 161 188
60-69 1141 22.43% 18.07% 18.02% 18.77% 429 342 187 183
70-79 697 9.88% 11.09% 16.96% 12.51% 189 210 176 122
80+ 329 6.01% 5.02% 7.23% 4.51% 115 95 75 44
Gender
Female 3084 50.29% 54.15% 54.24% 54.77% 962 1025 563 534
Male 2735 49.71% 45.85% 45.76% 45.23% 951 868 475 441
Total Sample Size: 5918
#here I am using that now one complete dataset to create the Table1 summary table. It is in the same order as the regression model.
#Table1 |> gtsave(filename = "Table1.png") #saving table as image so I can insert it into our report instead of screenshoting it.
Figure1 <- ggplot(data = ess_income_alc, mapping = aes(x = income, y = prop, fill = alc_consumption)) +
    geom_bar(position = "dodge", stat = "identity",alpha = 0.9, color = "white") +
labs(title = "Figure 1. Relationship between Income and Alcohol Consumption", subtitle = "Finland, Ireland, Portugal, and Slovenia", x = "Income", y = "Percentage", fill = "Alcohol Consumption Levels", caption = "Source: European Social Survey") +  theme_minimal() + 
  scale_fill_viridis_d(option = "E") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0, 0.5), breaks = seq(0, 0.5, by = 0.1)) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(color = "grey50", size = 12),
    axis.title.x = element_text(face = "bold"),
    axis.title.y = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.line = element_line(color = "black", linewidth = 0.7),
    axis.ticks = element_line(color = "black", linewidth = 0.7),
    legend.position = "none",
    panel.grid = element_blank(),
    plot.margin = margin(20, 20, 20, 20)
  )
Figure1

#first descriptive plot. I wanted to compare income and alcohol consumption summary stats since it is our main relationship of interest.
#ggsave(Figure1, 
 #     filename = "Figure1.png",
  #     device = "png")

##Results

ess_1 <- ess_clean_regression  %>%
  summarize(
    median = median(alc_consumption, na.rm = TRUE),
    q25 = quantile(alc_consumption, 0.25, na.rm = TRUE),
    q75 = quantile(alc_consumption, 0.75, na.rm = TRUE),
    iqr = q75 - q25,
    mean_alc = mean(alc_consumption, na.rm = TRUE),
    sd_alc = sd(alc_consumption, na.rm = TRUE),
    n = n(),
    se_alc = sd_alc / sqrt(n),
    .groups = "drop"
  ) %>% 
  mutate_if(is.numeric, ~round(., 2))
#means calculated so i can put them on the figures!
ess_2 <- ess_clean_regression %>%
  group_by(income) %>%
  summarize(
    median = median(alc_consumption, na.rm = TRUE),
    q25 = quantile(alc_consumption, 0.25, na.rm = TRUE),
    q75 = quantile(alc_consumption, 0.75, na.rm = TRUE),
    iqr = q75 - q25,
    mean_alc = mean(alc_consumption, na.rm = TRUE),
    sd_alc = sd(alc_consumption, na.rm = TRUE),
    n = n(),
    se_alc = sd_alc / sqrt(n),
    .groups = "drop"
  ) %>% 
  mutate_if(is.numeric, ~round(., 2)) 
#means calculated so i can put them on the figures!
ess_3 <- ess_clean_regression %>%
  group_by(age) %>%
  summarize(
    median = median(alc_consumption, na.rm = TRUE),
    q25 = quantile(alc_consumption, 0.25, na.rm = TRUE),
    q75 = quantile(alc_consumption, 0.75, na.rm = TRUE),
    iqr = q75 - q25,
    mean_alc = mean(alc_consumption, na.rm = TRUE),
    sd_alc = sd(alc_consumption, na.rm = TRUE),
    n = n(),
    se_alc = sd_alc / sqrt(n),
    .groups = "drop"
  ) %>% 
  mutate_if(is.numeric, ~round(., 2)) 
#means calculated so i can put them on the figures!
ess_4 <- ess_clean_regression %>%
  group_by(gender) %>%
  summarize(
    median = median(alc_consumption, na.rm = TRUE),
    q25 = quantile(alc_consumption, 0.25, na.rm = TRUE),
    q75 = quantile(alc_consumption, 0.75, na.rm = TRUE),
    iqr = q75 - q25,
    mean_alc = mean(alc_consumption, na.rm = TRUE),
    sd_alc = sd(alc_consumption, na.rm = TRUE),
    n = n(),
    se_alc = sd_alc / sqrt(n),
    .groups = "drop"
  ) %>% 
  mutate_if(is.numeric, ~round(., 2)) 
#means calculated so i can put them on the figures!
Figure2 <- ggplot(ess_clean_regression, aes(x = alc_consumption)) +
  geom_histogram(fill = "#770737") +
  geom_vline(
    xintercept = mean(ess_clean_regression$alc_consumption, na.rm = TRUE), linetype = "dashed", 
    color = "black", 
    linewidth = 0.7
  ) +
  geom_textvline(label = "Mean: 4.36", xintercept = 4.36, vjust = 1.3, size = 3.5, text_only = TRUE) +
  labs(
    title = "Figure 2. Distribution of Alcohol Consumption",
    subtitle = "Finland, Ireland, Portugal, and Slovenia",
    x = "Alcohol Consumption from Everyday (1) to Never (7)",
    y = "Repondant Count", caption = "Source: European Social Survey"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(color = "grey50", size = 12),
    axis.title.x = element_text(face = "bold"),
    axis.title.y = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.line = element_line(color = "black", linewidth = 0.7),
    axis.ticks = element_line(color = "black", linewidth = 0.7),
    legend.position = "none",
    panel.grid = element_blank(),
    plot.margin = margin(20, 20, 20, 20)
  ) +
  scale_x_continuous(limits = c(0, 8), breaks = seq(0, 8, by = 1)) +
  scale_y_continuous(limits = c(0,1500), breaks = seq(0, 1300, by = 250))
Figure2
## Warning in geom_textvline(label = "Mean: 4.36", xintercept = 4.36, vjust = 1.3, : All aesthetics have length 1, but the data has 5819 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).

#the above caluclated means really helped me with this plot and all the following plots. I really wanted to put the number on the plot since it was said in class that we cannot guess/estimate what the mean is, and what if I forget or stumble over my words. Now it is there directly on the plot and the viewers can see it. I also kept the same sort of theme for all the plots because I wanted there to be profesionnalism and unity between them. I didn't want it to look like they belonged in separate presentations. 
#ggsave(Figure2, 
 #      filename = "Figure2.png",
  #  device = "png")
setup1 <- ess_clean_regression %>%
  filter(!is.na(alc_consumption), !is.na(income)) %>%
  group_by(income) %>%
  summarize(
    mean_alc = mean(alc_consumption, na.rm = TRUE),
    sd_alc = sd(alc_consumption, na.rm = TRUE),
    n = n(),
    se_alc = sd_alc / sqrt(n),
    ci_lower = mean_alc - 1.96 * se_alc,
    ci_upper = mean_alc + 1.96 * se_alc,
    .groups = "drop"
  )

results_colors1 <- c("#647695", "#8A9CBC", "#B0C4E2", "#D6E4FF")

Figure3 <- ggplot(setup1, aes(y = income , x = mean_alc, fill = income)) +
  geom_col(alpha = 0.9, width = 0.7, color = "black", linewidth = 0.3) +
  geom_errorbarh(
    aes(xmin = ci_lower, xmax = ci_upper),
    height = 0.2,
    color = "black",
    linewidth = 0.7
  ) +  
  geom_text_repel(
    data = setup1,
    aes(label = case_when(
      income == "Bottom 10%" ~ "Mean: 4.84",
      income == "11% to 50%" ~ "Mean: 4.51",
      income == "51% to 90%" ~ "Mean: 4.03",
      income == "Top 10%" ~ "Mean: 3.86"
    )),
    nudge_x = 1,
    direction = "y",
    segment.size = 0,
    box.padding = 0.5,
    size = 3.5,
    fontface = "italic"
  ) +
  scale_fill_manual(values = results_colors1) +
  scale_x_continuous(limits = c(0, 7), expand = c(0, 0), breaks = seq(0, 7, by = 1)) +
  labs(
    title = "Figure 3. Alcohol Consumption Mean by Income in Europe",
    subtitle = "Finland, Ireland, Portugal, and Slovenia",
    y = NULL,
    x = "Alcohol Consumption from Everyday (1) to Never (7)",
    caption = "Source: European Social Survey"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(color = "grey50", size = 12),
    axis.title.x = element_text(face = "bold"),
    axis.text.y = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.line = element_line(color = "black", linewidth = 0.7),
    axis.ticks = element_line(color = "black", linewidth = 0.7),
    legend.position = "none",
    panel.grid = element_blank(),
    plot.margin = margin(20, 20, 20, 20)
  )
Figure3

#loving how the means look on the plot. Also how there is the confidence interval right on the plot. 
#ggsave(Figure3, 
 #      filename = "Figure3.png",
  #     device = "png")
setup2 <- ess_clean_regression %>%
  filter(!is.na(alc_consumption), !is.na(gender)) %>%
  group_by(gender) %>%
  summarize(
    mean_alc = mean(alc_consumption, na.rm = TRUE),
    sd_alc = sd(alc_consumption, na.rm = TRUE),
    n = n(),
    se_alc = sd_alc / sqrt(n),
    ci_lower = mean_alc - 1.96 * se_alc,
    ci_upper = mean_alc + 1.96 * se_alc,
    .groups = "drop"
  )

results_colors2 <- c("#6E260E", "#C04000") 

Figure4 <- ggplot(setup2, aes(y = gender, x = mean_alc, fill = gender)) +
  geom_col(alpha = 0.9, width = 0.7, color = "black", linewidth = 0.3) +
  geom_errorbarh(
    aes(xmin = ci_lower, xmax = ci_upper), #confidence interval showing
    height = 0.2,
    color = "black",
    linewidth = 0.7
  ) +
  geom_text_repel(
    data = setup2,
    aes(label = case_when(
      gender == "Female" ~ "Mean: 4.83",
      gender == "Male" ~ "Mean: 3.82"
    )),
    nudge_x = 1,
    direction = "y",
    segment.size = 0,
    box.padding = 0.5,
    size = 3.5,
    fontface = "italic"
  ) +
  scale_fill_manual(values = results_colors2) +
  scale_x_continuous(limits = c(0, 7), expand = c(0, 0), breaks = seq(0, 7, by = 1)) +
  labs(
    title = "Figure 4. Alcohol Consumption Mean by Gender in Europe",
    subtitle = "Finland, Ireland, Portugal, and Slovenia",
    y = NULL,
    x = "Alcohol Consumption from Everyday (1) to Never (7)",
    caption = "Source: European Social Survey"
  ) + theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(color = "grey50", size = 12),
    axis.title.x = element_text(face = "bold"),
    axis.text.y = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.line = element_line(color = "black", linewidth = 0.7),
    axis.ticks = element_line(color = "black", linewidth = 0.7),
    legend.position = "none",
    panel.grid = element_blank(),
    plot.margin = margin(20, 20, 20, 20)
  )
Figure4

#this was the biggest shocker plot, with men drinking more than women by a full unit. 
#ggsave(Figure4, 
   #    filename = "Figure4.png",
  #     device = "png")
setup3 <- ess_clean_regression %>%
  filter(!is.na(alc_consumption), !is.na(age)) %>%
  group_by(age) %>%
  summarize(
    mean_alc = mean(alc_consumption, na.rm = TRUE),
    sd_alc = sd(alc_consumption, na.rm = TRUE),
    n = n(),
    se_alc = sd_alc / sqrt(n),
    ci_lower = mean_alc - 1.96 * se_alc,
    ci_upper = mean_alc + 1.96 * se_alc,
    .groups = "drop"
  )

results_colors3 <- c("#355E3B", "#2E8B57", "#00A36C", "#50C878", "#90EE90", "#98FB98", "#C1E1C1")

Figure5 <- ggplot(setup3, aes(y = age, x = mean_alc, fill = age)) +
  geom_col(alpha = 0.9, width = 0.7, color = "black", linewidth = 0.3) +
  geom_errorbarh(
    aes(xmin = ci_lower, xmax = ci_upper),
    height = 0.2,
    color = "black",
    linewidth = 0.7
  ) +
  geom_text_repel(
    data = setup3,
    aes(label = case_when(
      age == "18-29" ~ "Mean: 4.46",
      age == "30-39" ~ "Mean: 4.38",
      age == "40-49" ~ "Mean: 4.21",
      age == "50-59" ~ "Mean: 4.04",
      age == "60-69" ~ "Mean: 4.23",
      age == "70-79" ~ "Mean: 4.64",
      age == "80+" ~ "Mean: 5.25"
    )),
    nudge_x = 1,
    direction = "y",
    segment.size = 0,
    box.padding = 0.5,
    size = 3.5,
    fontface = "italic"
  ) +
  scale_fill_manual(values = results_colors3) +
  scale_x_continuous(limits = c(0, 7), expand = c(0, 0), breaks = seq(0, 7, by = 1)) +
  labs(
    title = "Figure 5. Alcohol Consumption Mean by Age Group in Europe",
    subtitle = "Finland, Ireland, Portugal, and Slovenia",
    y = NULL,
    x = "Alcohol Consumption from Everyday (1) to Never (7)",
    caption = "Source: European Social Survey"
  ) + theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(color = "grey50", size = 12),
    axis.title.x = element_text(face = "bold"),
    axis.text.y = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.line = element_line(color = "black", linewidth = 0.7),
    axis.ticks = element_line(color = "black", linewidth = 0.7),
    legend.position = "none",
    panel.grid = element_blank(),
    plot.margin = margin(20, 20, 20, 20)
  )
Figure5

# I really wanted to do boxplots like you did in tutorial 8, but since alcohol consumption is not truly continuous it didn't work properly. But I am really happy with how these turned out.
#ggsave(Figure5, 
 #      filename = "Figure5.png",
  #    device = "png")

##Regression

model1 <- lm(alc_consumption ~ income, data = ess_clean_regression)
summary(model1) #model 1 creation
## 
## Call:
## lm(formula = alc_consumption ~ income, data = ess_clean_regression)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.8431 -1.5096 -0.0256  1.9744  3.1429 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.84306    0.06907  70.118  < 2e-16 ***
## income11% to 50% -0.33349    0.07810  -4.270 1.98e-05 ***
## income51% to 90% -0.81746    0.08135 -10.049  < 2e-16 ***
## incomeTop 10%    -0.98592    0.13262  -7.434 1.20e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.918 on 5815 degrees of freedom
## Multiple R-squared:  0.0245, Adjusted R-squared:  0.02399 
## F-statistic: 48.67 on 3 and 5815 DF,  p-value: < 2.2e-16
model2 <- lm(alc_consumption ~ income + age, data = ess_clean_regression)
summary(model2) # model 2 creation
## 
## Call:
## lm(formula = alc_consumption ~ income + age, data = ess_clean_regression)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4955 -1.5659 -0.1662  1.7873  3.3483 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.84874    0.09242  52.466  < 2e-16 ***
## income11% to 50% -0.28285    0.07794  -3.629 0.000287 ***
## income51% to 90% -0.70152    0.08290  -8.463  < 2e-16 ***
## incomeTop 10%    -0.85375    0.13358  -6.391 1.77e-10 ***
## age30-39          0.01903    0.09298   0.205 0.837867    
## age40-49         -0.14935    0.09365  -1.595 0.110795    
## age50-59         -0.34333    0.09059  -3.790 0.000152 ***
## age60-69         -0.22564    0.08860  -2.547 0.010904 *  
## age70-79          0.10886    0.09968   1.092 0.274820    
## age80+            0.64678    0.12610   5.129 3.01e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.905 on 5809 degrees of freedom
## Multiple R-squared:  0.03832,    Adjusted R-squared:  0.03683 
## F-statistic: 25.72 on 9 and 5809 DF,  p-value: < 2.2e-16
model3 <- lm(alc_consumption ~ income + age + gender, data = ess_clean_regression)
summary(model3) #model 3 creation
## 
## Call:
## lm(formula = alc_consumption ~ income + age + gender, data = ess_clean_regression)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8150 -1.4686 -0.1355  1.5610  3.7611 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       5.22856    0.09156  57.106  < 2e-16 ***
## income11% to 50% -0.19138    0.07562  -2.531  0.01140 *  
## income51% to 90% -0.55087    0.08064  -6.831 9.29e-12 ***
## incomeTop 10%    -0.65959    0.12974  -5.084 3.81e-07 ***
## age30-39         -0.04693    0.09011  -0.521  0.60254    
## age40-49         -0.22081    0.09076  -2.433  0.01501 *  
## age50-59         -0.37136    0.08774  -4.233 2.35e-05 ***
## age60-69         -0.23871    0.08581  -2.782  0.00542 ** 
## age70-79          0.09830    0.09653   1.018  0.30854    
## age80+            0.58646    0.12216   4.801 1.62e-06 ***
## genderMale       -0.95874    0.04879 -19.652  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.845 on 5808 degrees of freedom
## Multiple R-squared:  0.09828,    Adjusted R-squared:  0.09672 
## F-statistic:  63.3 on 10 and 5808 DF,  p-value: < 2.2e-16
model4 <- lm(alc_consumption ~ income + age + gender + cntry, data = ess_clean_regression)
summary(model4) #model 4 creation, decided to add country into the mix since I did not facet any of the plots. Faceting just looks too busy and hard to follow.
## 
## Call:
## lm(formula = alc_consumption ~ income + age + gender + cntry, 
##     data = ess_clean_regression)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8433 -1.4927 -0.1877  1.5866  3.9281 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       5.35017    0.10055  53.211  < 2e-16 ***
## income11% to 50% -0.18558    0.07577  -2.449  0.01435 *  
## income51% to 90% -0.56811    0.08213  -6.917 5.11e-12 ***
## incomeTop 10%    -0.69474    0.13100  -5.303 1.18e-07 ***
## age30-39         -0.03014    0.09013  -0.334  0.73810    
## age40-49         -0.21013    0.09066  -2.318  0.02050 *  
## age50-59         -0.36867    0.08756  -4.211 2.58e-05 ***
## age60-69         -0.24025    0.08566  -2.805  0.00505 ** 
## age70-79          0.12908    0.09653   1.337  0.18119    
## age80+            0.59908    0.12211   4.906 9.55e-07 ***
## genderMale       -0.96297    0.04869 -19.776  < 2e-16 ***
## cntryIE          -0.10598    0.06173  -1.717  0.08604 .  
## cntryPT          -0.37852    0.07175  -5.275 1.37e-07 ***
## cntrySI          -0.12931    0.07308  -1.769  0.07688 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.841 on 5805 degrees of freedom
## Multiple R-squared:  0.1026, Adjusted R-squared:  0.1006 
## F-statistic: 51.07 on 13 and 5805 DF,  p-value: < 2.2e-16
inter_age_gender <- lm(alc_consumption ~ income + age * gender + cntry, data = ess_clean_regression)
summary(inter_age_gender) # and finally this is the interaction model. I did try to do another one with country and age and it was just a mess with four lines showing across the graph instead of the two for gender. Gender is easier to follow and understand. 
## 
## Call:
## lm(formula = alc_consumption ~ income + age * gender + cntry, 
##     data = ess_clean_regression)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.0420 -1.4913 -0.1718  1.5106  3.9550 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          5.28070    0.11905  44.358  < 2e-16 ***
## income11% to 50%    -0.16220    0.07589  -2.137  0.03262 *  
## income51% to 90%    -0.54179    0.08231  -6.582 5.04e-11 ***
## incomeTop 10%       -0.68011    0.13095  -5.194 2.13e-07 ***
## age30-39            -0.06806    0.12396  -0.549  0.58300    
## age40-49            -0.24763    0.12446  -1.990  0.04667 *  
## age50-59            -0.35749    0.12261  -2.916  0.00356 ** 
## age60-69            -0.17443    0.12037  -1.449  0.14737    
## age70-79             0.37007    0.13451   2.751  0.00595 ** 
## age80+               0.86103    0.16255   5.297 1.22e-07 ***
## genderMale          -0.86777    0.13173  -6.587 4.87e-11 ***
## cntryIE             -0.09976    0.06168  -1.617  0.10584    
## cntryPT             -0.37899    0.07168  -5.287 1.29e-07 ***
## cntrySI             -0.12884    0.07300  -1.765  0.07763 .  
## age30-39:genderMale  0.08887    0.17898   0.497  0.61954    
## age40-49:genderMale  0.09079    0.18044   0.503  0.61488    
## age50-59:genderMale -0.02334    0.17435  -0.134  0.89352    
## age60-69:genderMale -0.13442    0.17108  -0.786  0.43209    
## age70-79:genderMale -0.50091    0.19224  -2.606  0.00919 ** 
## age80+:genderMale   -0.61330    0.24516  -2.502  0.01239 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.839 on 5799 degrees of freedom
## Multiple R-squared:  0.1058, Adjusted R-squared:  0.1028 
## F-statistic:  36.1 on 19 and 5799 DF,  p-value: < 2.2e-16
tab_model(
  model1, model2, model3, model4, inter_age_gender, title = "Table.2 Regression Models Predicting Alcohol Consumption across Four variables", 
  dv.labels = c("Model 1: Income", "Model 2: + Age", "Model 3: + Gender", "Model 4: + Country", "Interaction Model: Age * Gender"), pred.labels = c("Intercept", "Income 11% to 50%", " Income 51% to 90%", "Income Top 10%", "Age 30-39", "Age 40-49", "Age 50-59", "Age 60-69", "Age 70-79", "Age 80+", "Male", "Ireland", "Portugal", "Slovenia", "Age 30-39 x Male", "Age 40-49 x Male", "Age 50-59 x Male", "Age 60-69 x Male", "Age 70-79 x Male", "Age 80+ x Male"
  ),
  string.est = "Estimate",
  string.ci = "95% CI",
  string.p = "p-value",
  p.style = "numeric",
  collapse.ci = TRUE,
  show.aic = TRUE,
  show.r2 = TRUE,
  show.obs = TRUE,
  digits = 2,
  digits.p = 3)
Table.2 Regression Models Predicting Alcohol Consumption across Four variables
  Model 1: Income Model 2: + Age Model 3: + Gender Model 4: + Country Interaction Model: Age * Gender
Predictors Estimate p-value Estimate p-value Estimate p-value Estimate p-value Estimate p-value
Intercept 4.84
(4.71 – 4.98)
<0.001 4.85
(4.67 – 5.03)
<0.001 5.23
(5.05 – 5.41)
<0.001 5.35
(5.15 – 5.55)
<0.001 5.28
(5.05 – 5.51)
<0.001
Income 11% to 50% -0.33
(-0.49 – -0.18)
<0.001 -0.28
(-0.44 – -0.13)
<0.001 -0.19
(-0.34 – -0.04)
0.011 -0.19
(-0.33 – -0.04)
0.014 -0.16
(-0.31 – -0.01)
0.033
Income 51% to 90% -0.82
(-0.98 – -0.66)
<0.001 -0.70
(-0.86 – -0.54)
<0.001 -0.55
(-0.71 – -0.39)
<0.001 -0.57
(-0.73 – -0.41)
<0.001 -0.54
(-0.70 – -0.38)
<0.001
Income Top 10% -0.99
(-1.25 – -0.73)
<0.001 -0.85
(-1.12 – -0.59)
<0.001 -0.66
(-0.91 – -0.41)
<0.001 -0.69
(-0.95 – -0.44)
<0.001 -0.68
(-0.94 – -0.42)
<0.001
Age 30-39 0.02
(-0.16 – 0.20)
0.838 -0.05
(-0.22 – 0.13)
0.603 -0.03
(-0.21 – 0.15)
0.738 -0.07
(-0.31 – 0.17)
0.583
Age 40-49 -0.15
(-0.33 – 0.03)
0.111 -0.22
(-0.40 – -0.04)
0.015 -0.21
(-0.39 – -0.03)
0.021 -0.25
(-0.49 – -0.00)
0.047
Age 50-59 -0.34
(-0.52 – -0.17)
<0.001 -0.37
(-0.54 – -0.20)
<0.001 -0.37
(-0.54 – -0.20)
<0.001 -0.36
(-0.60 – -0.12)
0.004
Age 60-69 -0.23
(-0.40 – -0.05)
0.011 -0.24
(-0.41 – -0.07)
0.005 -0.24
(-0.41 – -0.07)
0.005 -0.17
(-0.41 – 0.06)
0.147
Age 70-79 0.11
(-0.09 – 0.30)
0.275 0.10
(-0.09 – 0.29)
0.309 0.13
(-0.06 – 0.32)
0.181 0.37
(0.11 – 0.63)
0.006
Age 80+ 0.65
(0.40 – 0.89)
<0.001 0.59
(0.35 – 0.83)
<0.001 0.60
(0.36 – 0.84)
<0.001 0.86
(0.54 – 1.18)
<0.001
Male -0.96
(-1.05 – -0.86)
<0.001 -0.96
(-1.06 – -0.87)
<0.001 -0.87
(-1.13 – -0.61)
<0.001
Ireland -0.11
(-0.23 – 0.02)
0.086 -0.10
(-0.22 – 0.02)
0.106
Portugal -0.38
(-0.52 – -0.24)
<0.001 -0.38
(-0.52 – -0.24)
<0.001
Slovenia -0.13
(-0.27 – 0.01)
0.077 -0.13
(-0.27 – 0.01)
0.078
Age 30-39 x Male 0.09
(-0.26 – 0.44)
0.620
Age 40-49 x Male 0.09
(-0.26 – 0.44)
0.615
Age 50-59 x Male -0.02
(-0.37 – 0.32)
0.894
Age 60-69 x Male -0.13
(-0.47 – 0.20)
0.432
Age 70-79 x Male -0.50
(-0.88 – -0.12)
0.009
Age 80+ x Male -0.61
(-1.09 – -0.13)
0.012
Observations 5819 5819 5819 5819 5819
R2 / R2 adjusted 0.024 / 0.024 0.038 / 0.037 0.098 / 0.097 0.103 / 0.101 0.106 / 0.103
AIC 24098.438 24027.397 23654.791 23632.578 23624.263
#here was the table2 for regression models. I had a lot of trouble saving this. I Got it in but I had to use an online converter to change from html to png. 
Figure6 <- plot_model(model3, 
           show.values = TRUE, 
           value.offset = 0.3, colors = c("#a13d61", "#649e92")) +
  labs(
    title = "Figure 6. Predictors of Alcohol Consumption",
    subtitle = "Coefficient estimates with 95% confidence intervals",
    x = "Estimate",
    y = NULL, caption = "Source: European Social Survey"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(color = "grey50", size = 12),
    axis.text.x = element_text(face = "bold"),
    axis.title.y = element_text(face = "bold", size = 9),
    axis.text = element_text(color = "black"),
    axis.line = element_line(color = "black", linewidth = 0.7),
    axis.ticks = element_line(color = "black", linewidth = 0.7),
    legend.position = "none",
    panel.grid = element_blank(),
    plot.margin = margin(20, 20, 20, 20)
  )
Figure6

#we did not use this figure, but I am going to leave it in here incase we use it in the presentation. This is the unstandardized predictor plot. 
#ggsave(Figure6, 
 #      filename = "Figure6.png",
  #    device = "png")
Figure7 <- plot_model(model3, 
           show.values = TRUE, 
           value.offset = 0.3,
           type = "std2", colors = c("#a13d61", "#649e92")) +
  labs(
    title = "Figure 7. Standardized Predictors of Alcohol Consumption",
    subtitle = "Coefficient estimates with 95% confidence intervals",
    x = "Estimate",
    y = NULL, caption = "Source: European Social Survey"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(color = "grey50", size = 12),
    axis.text.x = element_text(face = "bold"),
    axis.title.y = element_text(face = "bold", size = 9),
    axis.text = element_text(color = "black"),
    axis.line = element_line(color = "black", linewidth = 0.7),
    axis.ticks = element_line(color = "black", linewidth = 0.7),
    legend.position = "none",
    panel.grid = element_blank(),
    plot.margin = margin(20, 20, 20, 20)
  )
Figure7

#same with this plot, we did not use it but I will leave it here. This one is standardized.
#ggsave(Figure7, 
 #      filename = "Figure7.png",
  #    device = "png")
Figure8 <- plot_model(model4, 
                      show.values = TRUE,
                      value.offset = 0.3,
                      type = "pred", 
                      terms = "income", 
                      colors = "#129") +
  labs(
    title = "Figure 8. The Effect of Income on Alcohol Consumption",
    x = NULL,
    y = "Alcohol Consumption (1-7)", caption = "Source: European Social Survey"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(color = "grey50", size = 12),
    axis.title.y = element_text(face = "bold", size = 9),
    axis.text = element_text(color = "black", size = 10),
    axis.text.x = element_text(face = "bold", vjust = 0.5),
    axis.line = element_line(color = "black", linewidth = 0.7),
    panel.grid.major.y = element_line(color = "grey90", linewidth = 0.3),
    panel.grid.minor = element_blank(),
    plot.margin = margin(20, 20, 20, 20)) 

Figure8

#this is not very interesting since it is really just a different format of figure 3, but I will leave it here.
#ggsave(Figure8, 
   #    filename = "Figure8.png",
  #    device = "png")
Figure9 <- interact_plot(inter_age_gender, 
                                 pred = "age", 
                                 modx = "gender",
                                 interval = TRUE,
                                 int.width = 0.95,
                                 colors = results_colors2, legend.main = "Gender") +
  labs(
    title = "Figure 6. The Effect of Age on Alcohol Consumption by Gender",
    subtitle = "Interaction effects with 95% confidence intervals",
    y = "Alcohol Consumption from Everyday (1) to Never (7)",
    x = NULL,
    caption = "Source: European Social Survey"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 12),
    plot.subtitle = element_text(color = "grey50", size = 12),
    axis.title.y = element_text(face = "bold", size = 9),
    axis.text.x = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.line = element_line(color = "black", linewidth = 0.7),
    legend.position = "bottom",
    legend.title = element_text(face = "bold"),
    legend.background = element_rect(fill = "white", color = "grey90"),
    panel.grid.major.y = element_line(color = "grey90", linewidth = 0.3),
    panel.grid.minor = element_blank(),
    plot.margin = margin(20, 20, 20, 20)
  )
## ✖ Detected factor predictor.
## ℹ Plotting with cat_plot() instead.
## ℹ See `?interactions::cat_plot()` for full details on how to specify models
##   with categorical predictors.
## ℹ If you experience errors or unexpected results, try using cat_plot()
##   directly.
Figure9

#this is the interaction model, which turned out really well. I was very excited to see that it was not a straight line and that the interaction worked. It highlights that men drink more on a daily basis, and it allows for discussion in the written report.
#ggsave(Figure9, 
 #     filename = "Figure9.png", 
  #    device = "png")