Written Report

library("tidyverse")

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library("gt")
library("gapminder")
library("srvyr")

## 
## Attaching package: 'srvyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter

library("srvyrexploR")
library("fst")
library("ggridges")
library("plotly")

## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

library("DT")
library("gridExtra")

## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine

library("patchwork")
library("reshape2")

## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths

library("finalfit")
library("sjPlot")

## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!

library("plotly")
library("kableExtra")

## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:srvyr':
## 
##     group_rows
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows

library("broom")
library("modelsummary")

## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
##   options(modelsummary_factory_latex = 'kableExtra')
##   options(modelsummary_factory_html = 'kableExtra')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)

library("ggrepel")
library(geomtextpath)
library("interactions")
library("jtools")

# Load complete ESS data (490,555 observations)
ess <- read_fst("All-ESS-Data.fst")

###Methods and data tables and plots section

#Set up for table

ess_clean <- ess %>%
  filter(cntry %in% c("SI", "FI", "IE", "PT")) %>%
  mutate(alc_consumption = case_when(
    alcfreq %in% c(7) ~ "Never", 
     alcfreq %in% c(4:6) ~ "Monthly",
      alcfreq %in% c(2:3) ~ "Weekly",
      alcfreq %in% c(1) ~ "Everyday",
      TRUE ~ NA_character_
    ),
    alc_consumption = factor(alc_consumption, levels = c("Never", "Monthly", "Weekly", "Everyday")),
    
    income = case_when(
      hinctnta %in% c(1) ~ "Bottom 10%", 
      hinctnta %in% c(2:5) ~ "11% to 50%",
      hinctnta %in% c(6:9) ~ "51% to 90%",
      hinctnta %in% c(10) ~ "Top 10%",
      TRUE ~ NA_character_
    ),
    income = factor(income, levels = c("Bottom 10%", "11% to 50%", "51% to 90%", "Top 10%")),
    
    age = case_when(
      agea %in% c(18:29) ~ "18-29",
      agea %in% c(30:39) ~ "30-39",
      agea %in% c(40:49) ~ "40-49",
      agea %in% c(50:59) ~ "50-59",
      agea %in% c(60:69) ~ "60-69",
      agea %in% c(70:79) ~ "70-79",
      agea >= 80 ~ "80+",
      TRUE ~ NA_character_
    ), 
    age = factor(age, levels = c("18-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80+")),
    
    gender = case_when(
      gndr == 1 ~ "Male",
      gndr == 2 ~ "Female",
      TRUE ~ NA_character_
    ))

ess_clean <- ess_clean %>% dplyr::select(alc_consumption, income, age, gender, cntry) %>% filter(cntry == "FI" | cntry == "IE" | cntry == "PT" | cntry == "SI", !is.na(alc_consumption), !is.na(income), !is.na(age), !is.na(gender), !is.na(cntry))
#Set up for all non regression things

ess_clean_regression <- ess %>%
  filter(cntry %in% c("SI", "FI", "IE", "PT")) %>%
  mutate(
    alc_consumption = case_when(
      alcfreq > 7 ~ NA_real_,
      TRUE ~ as.numeric(alcfreq)
    ), 
    
    income = case_when(
      hinctnta %in% c(1) ~ "Bottom 10%", 
      hinctnta %in% c(2:5) ~ "11% to 50%",
      hinctnta %in% c(6:9) ~ "51% to 90%",
      hinctnta %in% c(10) ~ "Top 10%",
      TRUE ~ NA_character_
    ),
    income = factor(income, levels = c("Bottom 10%", "11% to 50%", "51% to 90%", "Top 10%")),
    
    age = case_when(
      agea %in% c(18:29) ~ "18-29",
      agea %in% c(30:39) ~ "30-39",
      agea %in% c(40:49) ~ "40-49",
      agea %in% c(50:59) ~ "50-59",
      agea %in% c(60:69) ~ "60-69",
      agea %in% c(70:79) ~ "70-79",
      agea >= 80 ~ "80+",
      TRUE ~ NA_character_
    ), 
    age = factor(age, levels = c("18-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80+")),
    
    gender = case_when(
      gndr == 1 ~ "Male",
      gndr == 2 ~ "Female",
      TRUE ~ NA_character_
    ))

ess_clean_regression <- ess_clean_regression %>% dplyr::select(alc_consumption, income, age, gender, cntry) %>% filter(cntry == "FI" | cntry == "IE" | cntry == "PT" | cntry == "SI", !is.na(alc_consumption), !is.na(income), !is.na(age), !is.na(gender), !is.na(cntry))
#set up for regression things

ess_complete <- ess_clean %>%
  filter(!is.na(alc_consumption), !is.na(income), !is.na(age), !is.na(gender))

country_counts <- ess %>%
  filter(cntry %in% c("SI", "FI", "IE", "PT")) %>%
  count(cntry)

transformed_counts <- ess_clean %>%
  filter(!is.na(alc_consumption) | !is.na(income) | !is.na(age) | !is.na(gender)) %>%
  count(cntry)

complete_counts <- ess_clean %>%
  filter(!is.na(alc_consumption), !is.na(income), !is.na(age), !is.na(gender)) %>%
  count(cntry)
#code from fixed initial analysis rmarkdown that prof parker provided us for the correct cleaning.

complete_counts

##   cntry    n
## 1    FI 1913
## 2    IE 1893
## 3    PT 1038
## 4    SI  975

ess_income <- ess_complete %>%
  filter(!is.na(income)) %>%
  count(income, cntry) %>%
  group_by(cntry) %>%
  mutate(
    prop = n/sum(n),
    pct = round(100 * prop, 2)
  )

# Age 
ess_age <- ess_complete %>%
  filter(!is.na(age)) %>%
  count(age, cntry) %>%
  group_by(cntry) %>%
  mutate(
    prop = n/sum(n),
    pct = round(100 * prop, 2)
  )

# Alcohol
ess_alc <- ess_complete %>%
  filter(!is.na(alc_consumption)) %>%
  count(alc_consumption, cntry) %>%
  group_by(alc_consumption) %>%
  mutate(
    prop = n/sum(n),
    pct = round(100 * prop, 2) 
  )

# Gender
ess_gender <- ess_complete %>%
  filter(!is.na(gender)) %>%
  count(gender, cntry) %>%
  group_by(cntry) %>%
  mutate(
    prop = n/sum(n),
    pct = round(100 * prop, 2)
  )

# Income and alcohol consumption
ess_income_alc <- ess_complete %>%
  filter(!is.na(income), !is.na(alc_consumption)) %>%
  count(income, alc_consumption) %>%
  group_by(income) %>%
  mutate(
    prop = n/sum(n),
    pct = round(100 * prop, 2)
  )

labeller = ggplot2::labeller(cntry = 
    c("FI" = "Finland",
      "IE" = "Ireland",
      "PT" = "Portugal",
      "SI" = "Slovenia"))
#set ups for plots that show pct and counts.

ess_gender_sample <- ess_gender %>%
  dplyr::select(n, gender) %>%
  group_by(gender) %>%
  summarise(across(starts_with("n"), ~sum(., na.rm = TRUE)))

## Adding missing grouping variables: `cntry`

pivot_gender <- ess_gender %>%
  dplyr::select(gender, cntry, n, pct, prop) %>%
  group_by(cntry, gender) %>%
 summarise(Percent = sum(prop, na.rm=TRUE),
            Respondents = sum(n, na.rm = TRUE)) %>% 
  pivot_wider(names_from = cntry, values_from = c(Percent, Respondents))

## `summarise()` has grouped output by 'cntry'. You can override using the
## `.groups` argument.

pivot_gender <- cbind(ess_gender_sample, pivot_gender)

colnames(pivot_gender) <- c("Gender", "Sample_Size", "DROP", "FI_PCT", "IE_PCT", "PT_PCT", "SI_PCT", "FI_N", "IE_N", "PT_N", "SI_N")

pivot_gender$DROP = NULL
#here I am finding the gender sample size, using pivot wider so it shows the sample size in numbers and pct, then using cbind to put it together

ess_income_sample <- ess_income %>%
  dplyr::select(n, income) %>%
  group_by(income) %>%
  summarise(across(starts_with("n"), ~sum(., na.rm = TRUE)))

## Adding missing grouping variables: `cntry`

pivot_income <- ess_income %>%
  dplyr::select(income, cntry, n, pct, prop) %>%
  group_by(cntry, income) %>%
  summarise(Percent = sum(prop, na.rm=TRUE),
            Respondents = sum(n, na.rm = TRUE)) %>% 
  pivot_wider(names_from = cntry, values_from = c(Percent, Respondents))

## `summarise()` has grouped output by 'cntry'. You can override using the
## `.groups` argument.

pivot_income <- cbind(ess_income_sample, pivot_income)

colnames(pivot_income) <- c("Income", "Sample_Size", "DROP", "FI_PCT", "IE_PCT", "PT_PCT", "SI_PCT", "FI_N", "IE_N", "PT_N", "SI_N")

pivot_income$DROP = NULL
#here I am finding the income sample size, using pivot wider so it shows the sample size in numbers and pct, then using cbind to put it together

ess_alc_sample <- ess_alc %>%
  dplyr::select(n, alc_consumption) %>%
  group_by(alc_consumption) %>%
  summarise(across(starts_with("n"), ~sum(., na.rm = TRUE)))

pivot_alc <- ess_alc %>%
  dplyr::select(alc_consumption, cntry, n, pct, prop) %>%
  group_by(cntry, alc_consumption) %>%
  summarise(Percent = sum(prop, na.rm=TRUE),
            Respondents = sum(n, na.rm = TRUE)) %>% 
  pivot_wider(names_from = cntry, values_from = c(Percent, Respondents))

## `summarise()` has grouped output by 'cntry'. You can override using the
## `.groups` argument.

pivot_alc <- cbind(ess_alc_sample, pivot_alc)

colnames(pivot_alc) <- c("Alcohol_Consumption", "Sample_Size", "DROP", "FI_PCT", "IE_PCT", "PT_PCT", "SI_PCT", "FI_N", "IE_N", "PT_N", "SI_N")

pivot_alc$DROP = NULL
#here I am finding the alcohol consumption sample size, using pivot wider so it shows the sample size in numbers and pct, then using cbind to put it together

ess_age_sample <- ess_age %>%
  dplyr::select(n, age) %>%
  group_by(age) %>%
  summarise(across(starts_with("n"), ~sum(., na.rm = TRUE)))

## Adding missing grouping variables: `cntry`

pivot_age <- ess_age %>%
  dplyr::select(age, cntry, n, pct, prop) %>%
  group_by(cntry, age) %>%
  summarise(Percent = sum(prop, na.rm=TRUE),
            Respondents = sum(n, na.rm = TRUE)) %>% 
  pivot_wider(names_from = cntry, values_from = c(Percent, Respondents))

## `summarise()` has grouped output by 'cntry'. You can override using the
## `.groups` argument.

pivot_age <- cbind(ess_age_sample, pivot_age)

colnames(pivot_age) <- c("Age", "Sample_Size", "DROP", "FI_PCT", "IE_PCT", "PT_PCT", "SI_PCT", "FI_N", "IE_N", "PT_N", "SI_N")

pivot_age$DROP = NULL
#here I am finding the age sample size, using pivot wider so it shows the sample size in numbers and pct, then using cbind to put it together

binded <- bind_rows(
  pivot_alc %>%
    mutate(characteristic = "Alcohol Consumption",
           category = Alcohol_Consumption) %>%
    dplyr::select(-Alcohol_Consumption),
  
  pivot_income %>% 
    mutate(characteristic = "Income",
           category = Income) %>%
    dplyr::select(-Income),
  
  pivot_age %>%
    mutate(characteristic = "Age",
           category = Age) %>%
    dplyr::select(-Age),
  
  pivot_gender %>%
    mutate(characteristic = "Gender",
           category = Gender) %>%
    dplyr::select(-Gender))
#since I have all those cbinded tables, now I am using bind_rows to put them all on top of eachother to create one simple statistics table.

#Summary Table

Table1 <- binded %>%
  gt(
    groupname_col = "characteristic"
  ) %>%
  cols_move_to_start(category) %>%
  tab_header(
    title = md("**Table 1**"),
    subtitle = md("*Source: European Social Survey*")
  ) %>%
  tab_spanner(label = md("**Respondents**"), columns = c("FI_N", "IE_N", "PT_N", "SI_N")) %>%
  tab_spanner(label = md("**Percentage**"), columns = c("FI_PCT", "IE_PCT", "PT_PCT", "SI_PCT")) %>%
  cols_label(
    category = md("**Category**"),
    Sample_Size = md("*Sample Size*"),
    FI_N = md("*Finland*"),
    IE_N = md("*Ireland*"),
    PT_N = md("*Portugal*"),
    SI_N = md("*Slovenia*"), 
    FI_PCT = md("*Finland*"),
    IE_PCT = md("*Ireland*"),
    PT_PCT = md("*Portugal*"),
    SI_PCT = md("*Slovenia*")
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_row_groups()
  ) %>%
  fmt_percent(columns = c("FI_PCT", "IE_PCT", "PT_PCT", "SI_PCT")) %>%
   tab_options(
    table.border.top.width = 4, 
    table.border.bottom.width = 4,
    column_labels.border.bottom.width = 2,
    heading.title.font.size = px(18),
    heading.subtitle.font.size = px(14),
    data_row.padding = px(8)
  ) %>%
  cols_align("center") %>%
  tab_style(
    style = list(
      cell_fill(color = "#f5f5f5")
    ),
    locations = cells_body(rows = seq(1, 17, 2))
  ) %>%
  tab_source_note(source_note = md("*Total Sample Size: 5918*"))
Table1

Category	Sample Size	Percentage				Respondents
Table 1
Source: European Social Survey
Category	Sample Size	Finland	Ireland	Portugal	Slovenia	Finland	Ireland	Portugal	Slovenia
Alcohol Consumption
Never	1212	19.14%	40.51%	24.50%	15.84%	232	491	297	192
Monthly	2232	44.44%	25.58%	12.19%	17.79%	992	571	272	397
Weekly	1992	33.03%	39.11%	12.00%	15.86%	658	779	239	316
Everyday	383	8.09%	13.58%	60.05%	18.28%	31	52	230	70
Income
Bottom 10%	771	8.63%	19.60%	11.75%	11.59%	165	371	122	113
11% to 50%	2769	38.11%	53.04%	51.35%	51.59%	729	1004	533	503
51% to 90%	1992	45.53%	24.41%	33.24%	32.21%	871	462	345	314
Top 10%	287	7.74%	2.96%	3.66%	4.62%	148	56	38	45
Age
18-29	780	13.96%	13.26%	11.37%	14.77%	267	251	118	144
30-39	932	13.75%	18.75%	16.18%	14.97%	263	355	168	146
40-49	902	14.90%	16.69%	14.74%	15.18%	285	316	153	148
50-59	1038	19.08%	17.12%	15.51%	19.28%	365	324	161	188
60-69	1141	22.43%	18.07%	18.02%	18.77%	429	342	187	183
70-79	697	9.88%	11.09%	16.96%	12.51%	189	210	176	122
80+	329	6.01%	5.02%	7.23%	4.51%	115	95	75	44
Gender
Female	3084	50.29%	54.15%	54.24%	54.77%	962	1025	563	534
Male	2735	49.71%	45.85%	45.76%	45.23%	951	868	475	441
Total Sample Size: 5918

#here I am using that now one complete dataset to create the Table1 summary table. It is in the same order as the regression model.

#Table1 |> gtsave(filename = "Table1.png") #saving table as image so I can insert it into our report instead of screenshoting it.

Figure1 <- ggplot(data = ess_income_alc, mapping = aes(x = income, y = prop, fill = alc_consumption)) +
    geom_bar(position = "dodge", stat = "identity",alpha = 0.9, color = "white") +
labs(title = "Figure 1. Relationship between Income and Alcohol Consumption", subtitle = "Finland, Ireland, Portugal, and Slovenia", x = "Income", y = "Percentage", fill = "Alcohol Consumption Levels", caption = "Source: European Social Survey") +  theme_minimal() + 
  scale_fill_viridis_d(option = "E") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0, 0.5), breaks = seq(0, 0.5, by = 0.1)) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(color = "grey50", size = 12),
    axis.title.x = element_text(face = "bold"),
    axis.title.y = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.line = element_line(color = "black", linewidth = 0.7),
    axis.ticks = element_line(color = "black", linewidth = 0.7),
    legend.position = "none",
    panel.grid = element_blank(),
    plot.margin = margin(20, 20, 20, 20)
  )
Figure1

#first descriptive plot. I wanted to compare income and alcohol consumption summary stats since it is our main relationship of interest.

#ggsave(Figure1, 
 #     filename = "Figure1.png",
  #     device = "png")

##Results

ess_1 <- ess_clean_regression  %>%
  summarize(
    median = median(alc_consumption, na.rm = TRUE),
    q25 = quantile(alc_consumption, 0.25, na.rm = TRUE),
    q75 = quantile(alc_consumption, 0.75, na.rm = TRUE),
    iqr = q75 - q25,
    mean_alc = mean(alc_consumption, na.rm = TRUE),
    sd_alc = sd(alc_consumption, na.rm = TRUE),
    n = n(),
    se_alc = sd_alc / sqrt(n),
    .groups = "drop"
  ) %>% 
  mutate_if(is.numeric, ~round(., 2))
#means calculated so i can put them on the figures!

ess_2 <- ess_clean_regression %>%
  group_by(income) %>%
  summarize(
    median = median(alc_consumption, na.rm = TRUE),
    q25 = quantile(alc_consumption, 0.25, na.rm = TRUE),
    q75 = quantile(alc_consumption, 0.75, na.rm = TRUE),
    iqr = q75 - q25,
    mean_alc = mean(alc_consumption, na.rm = TRUE),
    sd_alc = sd(alc_consumption, na.rm = TRUE),
    n = n(),
    se_alc = sd_alc / sqrt(n),
    .groups = "drop"
  ) %>% 
  mutate_if(is.numeric, ~round(., 2)) 
#means calculated so i can put them on the figures!

ess_3 <- ess_clean_regression %>%
  group_by(age) %>%
  summarize(
    median = median(alc_consumption, na.rm = TRUE),
    q25 = quantile(alc_consumption, 0.25, na.rm = TRUE),
    q75 = quantile(alc_consumption, 0.75, na.rm = TRUE),
    iqr = q75 - q25,
    mean_alc = mean(alc_consumption, na.rm = TRUE),
    sd_alc = sd(alc_consumption, na.rm = TRUE),
    n = n(),
    se_alc = sd_alc / sqrt(n),
    .groups = "drop"
  ) %>% 
  mutate_if(is.numeric, ~round(., 2)) 
#means calculated so i can put them on the figures!

ess_4 <- ess_clean_regression %>%
  group_by(gender) %>%
  summarize(
    median = median(alc_consumption, na.rm = TRUE),
    q25 = quantile(alc_consumption, 0.25, na.rm = TRUE),
    q75 = quantile(alc_consumption, 0.75, na.rm = TRUE),
    iqr = q75 - q25,
    mean_alc = mean(alc_consumption, na.rm = TRUE),
    sd_alc = sd(alc_consumption, na.rm = TRUE),
    n = n(),
    se_alc = sd_alc / sqrt(n),
    .groups = "drop"
  ) %>% 
  mutate_if(is.numeric, ~round(., 2)) 
#means calculated so i can put them on the figures!

Figure2 <- ggplot(ess_clean_regression, aes(x = alc_consumption)) +
  geom_histogram(fill = "#770737") +
  geom_vline(
    xintercept = mean(ess_clean_regression$alc_consumption, na.rm = TRUE), linetype = "dashed", 
    color = "black", 
    linewidth = 0.7
  ) +
  geom_textvline(label = "Mean: 4.36", xintercept = 4.36, vjust = 1.3, size = 3.5, text_only = TRUE) +
  labs(
    title = "Figure 2. Distribution of Alcohol Consumption",
    subtitle = "Finland, Ireland, Portugal, and Slovenia",
    x = "Alcohol Consumption from Everyday (1) to Never (7)",
    y = "Repondant Count", caption = "Source: European Social Survey"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(color = "grey50", size = 12),
    axis.title.x = element_text(face = "bold"),
    axis.title.y = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.line = element_line(color = "black", linewidth = 0.7),
    axis.ticks = element_line(color = "black", linewidth = 0.7),
    legend.position = "none",
    panel.grid = element_blank(),
    plot.margin = margin(20, 20, 20, 20)
  ) +
  scale_x_continuous(limits = c(0, 8), breaks = seq(0, 8, by = 1)) +
  scale_y_continuous(limits = c(0,1500), breaks = seq(0, 1300, by = 250))
Figure2

## Warning in geom_textvline(label = "Mean: 4.36", xintercept = 4.36, vjust = 1.3, : All aesthetics have length 1, but the data has 5819 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).

#the above caluclated means really helped me with this plot and all the following plots. I really wanted to put the number on the plot since it was said in class that we cannot guess/estimate what the mean is, and what if I forget or stumble over my words. Now it is there directly on the plot and the viewers can see it. I also kept the same sort of theme for all the plots because I wanted there to be profesionnalism and unity between them. I didn't want it to look like they belonged in separate presentations.

#ggsave(Figure2, 
 #      filename = "Figure2.png",
  #  device = "png")

setup1 <- ess_clean_regression %>%
  filter(!is.na(alc_consumption), !is.na(income)) %>%
  group_by(income) %>%
  summarize(
    mean_alc = mean(alc_consumption, na.rm = TRUE),
    sd_alc = sd(alc_consumption, na.rm = TRUE),
    n = n(),
    se_alc = sd_alc / sqrt(n),
    ci_lower = mean_alc - 1.96 * se_alc,
    ci_upper = mean_alc + 1.96 * se_alc,
    .groups = "drop"
  )

results_colors1 <- c("#647695", "#8A9CBC", "#B0C4E2", "#D6E4FF")

Figure3 <- ggplot(setup1, aes(y = income , x = mean_alc, fill = income)) +
  geom_col(alpha = 0.9, width = 0.7, color = "black", linewidth = 0.3) +
  geom_errorbarh(
    aes(xmin = ci_lower, xmax = ci_upper),
    height = 0.2,
    color = "black",
    linewidth = 0.7
  ) +  
  geom_text_repel(
    data = setup1,
    aes(label = case_when(
      income == "Bottom 10%" ~ "Mean: 4.84",
      income == "11% to 50%" ~ "Mean: 4.51",
      income == "51% to 90%" ~ "Mean: 4.03",
      income == "Top 10%" ~ "Mean: 3.86"
    )),
    nudge_x = 1,
    direction = "y",
    segment.size = 0,
    box.padding = 0.5,
    size = 3.5,
    fontface = "italic"
  ) +
  scale_fill_manual(values = results_colors1) +
  scale_x_continuous(limits = c(0, 7), expand = c(0, 0), breaks = seq(0, 7, by = 1)) +
  labs(
    title = "Figure 3. Alcohol Consumption Mean by Income in Europe",
    subtitle = "Finland, Ireland, Portugal, and Slovenia",
    y = NULL,
    x = "Alcohol Consumption from Everyday (1) to Never (7)",
    caption = "Source: European Social Survey"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(color = "grey50", size = 12),
    axis.title.x = element_text(face = "bold"),
    axis.text.y = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.line = element_line(color = "black", linewidth = 0.7),
    axis.ticks = element_line(color = "black", linewidth = 0.7),
    legend.position = "none",
    panel.grid = element_blank(),
    plot.margin = margin(20, 20, 20, 20)
  )
Figure3

#loving how the means look on the plot. Also how there is the confidence interval right on the plot.

#ggsave(Figure3, 
 #      filename = "Figure3.png",
  #     device = "png")

setup2 <- ess_clean_regression %>%
  filter(!is.na(alc_consumption), !is.na(gender)) %>%
  group_by(gender) %>%
  summarize(
    mean_alc = mean(alc_consumption, na.rm = TRUE),
    sd_alc = sd(alc_consumption, na.rm = TRUE),
    n = n(),
    se_alc = sd_alc / sqrt(n),
    ci_lower = mean_alc - 1.96 * se_alc,
    ci_upper = mean_alc + 1.96 * se_alc,
    .groups = "drop"
  )

results_colors2 <- c("#6E260E", "#C04000") 

Figure4 <- ggplot(setup2, aes(y = gender, x = mean_alc, fill = gender)) +
  geom_col(alpha = 0.9, width = 0.7, color = "black", linewidth = 0.3) +
  geom_errorbarh(
    aes(xmin = ci_lower, xmax = ci_upper), #confidence interval showing
    height = 0.2,
    color = "black",
    linewidth = 0.7
  ) +
  geom_text_repel(
    data = setup2,
    aes(label = case_when(
      gender == "Female" ~ "Mean: 4.83",
      gender == "Male" ~ "Mean: 3.82"
    )),
    nudge_x = 1,
    direction = "y",
    segment.size = 0,
    box.padding = 0.5,
    size = 3.5,
    fontface = "italic"
  ) +
  scale_fill_manual(values = results_colors2) +
  scale_x_continuous(limits = c(0, 7), expand = c(0, 0), breaks = seq(0, 7, by = 1)) +
  labs(
    title = "Figure 4. Alcohol Consumption Mean by Gender in Europe",
    subtitle = "Finland, Ireland, Portugal, and Slovenia",
    y = NULL,
    x = "Alcohol Consumption from Everyday (1) to Never (7)",
    caption = "Source: European Social Survey"
  ) + theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(color = "grey50", size = 12),
    axis.title.x = element_text(face = "bold"),
    axis.text.y = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.line = element_line(color = "black", linewidth = 0.7),
    axis.ticks = element_line(color = "black", linewidth = 0.7),
    legend.position = "none",
    panel.grid = element_blank(),
    plot.margin = margin(20, 20, 20, 20)
  )
Figure4

#this was the biggest shocker plot, with men drinking more than women by a full unit.

#ggsave(Figure4, 
   #    filename = "Figure4.png",
  #     device = "png")

setup3 <- ess_clean_regression %>%
  filter(!is.na(alc_consumption), !is.na(age)) %>%
  group_by(age) %>%
  summarize(
    mean_alc = mean(alc_consumption, na.rm = TRUE),
    sd_alc = sd(alc_consumption, na.rm = TRUE),
    n = n(),
    se_alc = sd_alc / sqrt(n),
    ci_lower = mean_alc - 1.96 * se_alc,
    ci_upper = mean_alc + 1.96 * se_alc,
    .groups = "drop"
  )

results_colors3 <- c("#355E3B", "#2E8B57", "#00A36C", "#50C878", "#90EE90", "#98FB98", "#C1E1C1")

Figure5 <- ggplot(setup3, aes(y = age, x = mean_alc, fill = age)) +
  geom_col(alpha = 0.9, width = 0.7, color = "black", linewidth = 0.3) +
  geom_errorbarh(
    aes(xmin = ci_lower, xmax = ci_upper),
    height = 0.2,
    color = "black",
    linewidth = 0.7
  ) +
  geom_text_repel(
    data = setup3,
    aes(label = case_when(
      age == "18-29" ~ "Mean: 4.46",
      age == "30-39" ~ "Mean: 4.38",
      age == "40-49" ~ "Mean: 4.21",
      age == "50-59" ~ "Mean: 4.04",
      age == "60-69" ~ "Mean: 4.23",
      age == "70-79" ~ "Mean: 4.64",
      age == "80+" ~ "Mean: 5.25"
    )),
    nudge_x = 1,
    direction = "y",
    segment.size = 0,
    box.padding = 0.5,
    size = 3.5,
    fontface = "italic"
  ) +
  scale_fill_manual(values = results_colors3) +
  scale_x_continuous(limits = c(0, 7), expand = c(0, 0), breaks = seq(0, 7, by = 1)) +
  labs(
    title = "Figure 5. Alcohol Consumption Mean by Age Group in Europe",
    subtitle = "Finland, Ireland, Portugal, and Slovenia",
    y = NULL,
    x = "Alcohol Consumption from Everyday (1) to Never (7)",
    caption = "Source: European Social Survey"
  ) + theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(color = "grey50", size = 12),
    axis.title.x = element_text(face = "bold"),
    axis.text.y = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.line = element_line(color = "black", linewidth = 0.7),
    axis.ticks = element_line(color = "black", linewidth = 0.7),
    legend.position = "none",
    panel.grid = element_blank(),
    plot.margin = margin(20, 20, 20, 20)
  )
Figure5

# I really wanted to do boxplots like you did in tutorial 8, but since alcohol consumption is not truly continuous it didn't work properly. But I am really happy with how these turned out.

#ggsave(Figure5, 
 #      filename = "Figure5.png",
  #    device = "png")

##Regression

model1 <- lm(alc_consumption ~ income, data = ess_clean_regression)
summary(model1) #model 1 creation

## 
## Call:
## lm(formula = alc_consumption ~ income, data = ess_clean_regression)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.8431 -1.5096 -0.0256  1.9744  3.1429 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.84306    0.06907  70.118  < 2e-16 ***
## income11% to 50% -0.33349    0.07810  -4.270 1.98e-05 ***
## income51% to 90% -0.81746    0.08135 -10.049  < 2e-16 ***
## incomeTop 10%    -0.98592    0.13262  -7.434 1.20e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.918 on 5815 degrees of freedom
## Multiple R-squared:  0.0245, Adjusted R-squared:  0.02399 
## F-statistic: 48.67 on 3 and 5815 DF,  p-value: < 2.2e-16

model2 <- lm(alc_consumption ~ income + age, data = ess_clean_regression)
summary(model2) # model 2 creation

## 
## Call:
## lm(formula = alc_consumption ~ income + age, data = ess_clean_regression)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4955 -1.5659 -0.1662  1.7873  3.3483 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.84874    0.09242  52.466  < 2e-16 ***
## income11% to 50% -0.28285    0.07794  -3.629 0.000287 ***
## income51% to 90% -0.70152    0.08290  -8.463  < 2e-16 ***
## incomeTop 10%    -0.85375    0.13358  -6.391 1.77e-10 ***
## age30-39          0.01903    0.09298   0.205 0.837867    
## age40-49         -0.14935    0.09365  -1.595 0.110795    
## age50-59         -0.34333    0.09059  -3.790 0.000152 ***
## age60-69         -0.22564    0.08860  -2.547 0.010904 *  
## age70-79          0.10886    0.09968   1.092 0.274820    
## age80+            0.64678    0.12610   5.129 3.01e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.905 on 5809 degrees of freedom
## Multiple R-squared:  0.03832,    Adjusted R-squared:  0.03683 
## F-statistic: 25.72 on 9 and 5809 DF,  p-value: < 2.2e-16

model3 <- lm(alc_consumption ~ income + age + gender, data = ess_clean_regression)
summary(model3) #model 3 creation

## 
## Call:
## lm(formula = alc_consumption ~ income + age + gender, data = ess_clean_regression)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8150 -1.4686 -0.1355  1.5610  3.7611 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       5.22856    0.09156  57.106  < 2e-16 ***
## income11% to 50% -0.19138    0.07562  -2.531  0.01140 *  
## income51% to 90% -0.55087    0.08064  -6.831 9.29e-12 ***
## incomeTop 10%    -0.65959    0.12974  -5.084 3.81e-07 ***
## age30-39         -0.04693    0.09011  -0.521  0.60254    
## age40-49         -0.22081    0.09076  -2.433  0.01501 *  
## age50-59         -0.37136    0.08774  -4.233 2.35e-05 ***
## age60-69         -0.23871    0.08581  -2.782  0.00542 ** 
## age70-79          0.09830    0.09653   1.018  0.30854    
## age80+            0.58646    0.12216   4.801 1.62e-06 ***
## genderMale       -0.95874    0.04879 -19.652  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.845 on 5808 degrees of freedom
## Multiple R-squared:  0.09828,    Adjusted R-squared:  0.09672 
## F-statistic:  63.3 on 10 and 5808 DF,  p-value: < 2.2e-16

model4 <- lm(alc_consumption ~ income + age + gender + cntry, data = ess_clean_regression)
summary(model4) #model 4 creation, decided to add country into the mix since I did not facet any of the plots. Faceting just looks too busy and hard to follow.

## 
## Call:
## lm(formula = alc_consumption ~ income + age + gender + cntry, 
##     data = ess_clean_regression)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8433 -1.4927 -0.1877  1.5866  3.9281 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       5.35017    0.10055  53.211  < 2e-16 ***
## income11% to 50% -0.18558    0.07577  -2.449  0.01435 *  
## income51% to 90% -0.56811    0.08213  -6.917 5.11e-12 ***
## incomeTop 10%    -0.69474    0.13100  -5.303 1.18e-07 ***
## age30-39         -0.03014    0.09013  -0.334  0.73810    
## age40-49         -0.21013    0.09066  -2.318  0.02050 *  
## age50-59         -0.36867    0.08756  -4.211 2.58e-05 ***
## age60-69         -0.24025    0.08566  -2.805  0.00505 ** 
## age70-79          0.12908    0.09653   1.337  0.18119    
## age80+            0.59908    0.12211   4.906 9.55e-07 ***
## genderMale       -0.96297    0.04869 -19.776  < 2e-16 ***
## cntryIE          -0.10598    0.06173  -1.717  0.08604 .  
## cntryPT          -0.37852    0.07175  -5.275 1.37e-07 ***
## cntrySI          -0.12931    0.07308  -1.769  0.07688 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.841 on 5805 degrees of freedom
## Multiple R-squared:  0.1026, Adjusted R-squared:  0.1006 
## F-statistic: 51.07 on 13 and 5805 DF,  p-value: < 2.2e-16

inter_age_gender <- lm(alc_consumption ~ income + age * gender + cntry, data = ess_clean_regression)
summary(inter_age_gender) # and finally this is the interaction model. I did try to do another one with country and age and it was just a mess with four lines showing across the graph instead of the two for gender. Gender is easier to follow and understand.

## 
## Call:
## lm(formula = alc_consumption ~ income + age * gender + cntry, 
##     data = ess_clean_regression)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.0420 -1.4913 -0.1718  1.5106  3.9550 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          5.28070    0.11905  44.358  < 2e-16 ***
## income11% to 50%    -0.16220    0.07589  -2.137  0.03262 *  
## income51% to 90%    -0.54179    0.08231  -6.582 5.04e-11 ***
## incomeTop 10%       -0.68011    0.13095  -5.194 2.13e-07 ***
## age30-39            -0.06806    0.12396  -0.549  0.58300    
## age40-49            -0.24763    0.12446  -1.990  0.04667 *  
## age50-59            -0.35749    0.12261  -2.916  0.00356 ** 
## age60-69            -0.17443    0.12037  -1.449  0.14737    
## age70-79             0.37007    0.13451   2.751  0.00595 ** 
## age80+               0.86103    0.16255   5.297 1.22e-07 ***
## genderMale          -0.86777    0.13173  -6.587 4.87e-11 ***
## cntryIE             -0.09976    0.06168  -1.617  0.10584    
## cntryPT             -0.37899    0.07168  -5.287 1.29e-07 ***
## cntrySI             -0.12884    0.07300  -1.765  0.07763 .  
## age30-39:genderMale  0.08887    0.17898   0.497  0.61954    
## age40-49:genderMale  0.09079    0.18044   0.503  0.61488    
## age50-59:genderMale -0.02334    0.17435  -0.134  0.89352    
## age60-69:genderMale -0.13442    0.17108  -0.786  0.43209    
## age70-79:genderMale -0.50091    0.19224  -2.606  0.00919 ** 
## age80+:genderMale   -0.61330    0.24516  -2.502  0.01239 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.839 on 5799 degrees of freedom
## Multiple R-squared:  0.1058, Adjusted R-squared:  0.1028 
## F-statistic:  36.1 on 19 and 5799 DF,  p-value: < 2.2e-16

tab_model(
  model1, model2, model3, model4, inter_age_gender, title = "Table.2 Regression Models Predicting Alcohol Consumption across Four variables", 
  dv.labels = c("Model 1: Income", "Model 2: + Age", "Model 3: + Gender", "Model 4: + Country", "Interaction Model: Age * Gender"), pred.labels = c("Intercept", "Income 11% to 50%", " Income 51% to 90%", "Income Top 10%", "Age 30-39", "Age 40-49", "Age 50-59", "Age 60-69", "Age 70-79", "Age 80+", "Male", "Ireland", "Portugal", "Slovenia", "Age 30-39 x Male", "Age 40-49 x Male", "Age 50-59 x Male", "Age 60-69 x Male", "Age 70-79 x Male", "Age 80+ x Male"
  ),
  string.est = "Estimate",
  string.ci = "95% CI",
  string.p = "p-value",
  p.style = "numeric",
  collapse.ci = TRUE,
  show.aic = TRUE,
  show.r2 = TRUE,
  show.obs = TRUE,
  digits = 2,
  digits.p = 3)

Table.2 Regression Models Predicting Alcohol Consumption across Four variables
	Model 1: Income		Model 2: + Age		Model 3: + Gender		Model 4: + Country		Interaction Model: Age * Gender
Predictors	Estimate	p-value	Estimate	p-value	Estimate	p-value	Estimate	p-value	Estimate	p-value
Intercept	4.84 (4.71 – 4.98)	<0.001	4.85 (4.67 – 5.03)	<0.001	5.23 (5.05 – 5.41)	<0.001	5.35 (5.15 – 5.55)	<0.001	5.28 (5.05 – 5.51)	<0.001
Income 11% to 50%	-0.33 (-0.49 – -0.18)	<0.001	-0.28 (-0.44 – -0.13)	<0.001	-0.19 (-0.34 – -0.04)	0.011	-0.19 (-0.33 – -0.04)	0.014	-0.16 (-0.31 – -0.01)	0.033
Income 51% to 90%	-0.82 (-0.98 – -0.66)	<0.001	-0.70 (-0.86 – -0.54)	<0.001	-0.55 (-0.71 – -0.39)	<0.001	-0.57 (-0.73 – -0.41)	<0.001	-0.54 (-0.70 – -0.38)	<0.001
Income Top 10%	-0.99 (-1.25 – -0.73)	<0.001	-0.85 (-1.12 – -0.59)	<0.001	-0.66 (-0.91 – -0.41)	<0.001	-0.69 (-0.95 – -0.44)	<0.001	-0.68 (-0.94 – -0.42)	<0.001
Age 30-39			0.02 (-0.16 – 0.20)	0.838	-0.05 (-0.22 – 0.13)	0.603	-0.03 (-0.21 – 0.15)	0.738	-0.07 (-0.31 – 0.17)	0.583
Age 40-49			-0.15 (-0.33 – 0.03)	0.111	-0.22 (-0.40 – -0.04)	0.015	-0.21 (-0.39 – -0.03)	0.021	-0.25 (-0.49 – -0.00)	0.047
Age 50-59			-0.34 (-0.52 – -0.17)	<0.001	-0.37 (-0.54 – -0.20)	<0.001	-0.37 (-0.54 – -0.20)	<0.001	-0.36 (-0.60 – -0.12)	0.004
Age 60-69			-0.23 (-0.40 – -0.05)	0.011	-0.24 (-0.41 – -0.07)	0.005	-0.24 (-0.41 – -0.07)	0.005	-0.17 (-0.41 – 0.06)	0.147
Age 70-79			0.11 (-0.09 – 0.30)	0.275	0.10 (-0.09 – 0.29)	0.309	0.13 (-0.06 – 0.32)	0.181	0.37 (0.11 – 0.63)	0.006
Age 80+			0.65 (0.40 – 0.89)	<0.001	0.59 (0.35 – 0.83)	<0.001	0.60 (0.36 – 0.84)	<0.001	0.86 (0.54 – 1.18)	<0.001
Male					-0.96 (-1.05 – -0.86)	<0.001	-0.96 (-1.06 – -0.87)	<0.001	-0.87 (-1.13 – -0.61)	<0.001
Ireland							-0.11 (-0.23 – 0.02)	0.086	-0.10 (-0.22 – 0.02)	0.106
Portugal							-0.38 (-0.52 – -0.24)	<0.001	-0.38 (-0.52 – -0.24)	<0.001
Slovenia							-0.13 (-0.27 – 0.01)	0.077	-0.13 (-0.27 – 0.01)	0.078
Age 30-39 x Male									0.09 (-0.26 – 0.44)	0.620
Age 40-49 x Male									0.09 (-0.26 – 0.44)	0.615
Age 50-59 x Male									-0.02 (-0.37 – 0.32)	0.894
Age 60-69 x Male									-0.13 (-0.47 – 0.20)	0.432
Age 70-79 x Male									-0.50 (-0.88 – -0.12)	0.009
Age 80+ x Male									-0.61 (-1.09 – -0.13)	0.012
Observations	5819		5819		5819		5819		5819
R² / R² adjusted	0.024 / 0.024		0.038 / 0.037		0.098 / 0.097		0.103 / 0.101		0.106 / 0.103
AIC	24098.438		24027.397		23654.791		23632.578		23624.263

#here was the table2 for regression models. I had a lot of trouble saving this. I Got it in but I had to use an online converter to change from html to png.

Figure6 <- plot_model(model3, 
           show.values = TRUE, 
           value.offset = 0.3, colors = c("#a13d61", "#649e92")) +
  labs(
    title = "Figure 6. Predictors of Alcohol Consumption",
    subtitle = "Coefficient estimates with 95% confidence intervals",
    x = "Estimate",
    y = NULL, caption = "Source: European Social Survey"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(color = "grey50", size = 12),
    axis.text.x = element_text(face = "bold"),
    axis.title.y = element_text(face = "bold", size = 9),
    axis.text = element_text(color = "black"),
    axis.line = element_line(color = "black", linewidth = 0.7),
    axis.ticks = element_line(color = "black", linewidth = 0.7),
    legend.position = "none",
    panel.grid = element_blank(),
    plot.margin = margin(20, 20, 20, 20)
  )
Figure6

#we did not use this figure, but I am going to leave it in here incase we use it in the presentation. This is the unstandardized predictor plot.

#ggsave(Figure6, 
 #      filename = "Figure6.png",
  #    device = "png")

Figure7 <- plot_model(model3, 
           show.values = TRUE, 
           value.offset = 0.3,
           type = "std2", colors = c("#a13d61", "#649e92")) +
  labs(
    title = "Figure 7. Standardized Predictors of Alcohol Consumption",
    subtitle = "Coefficient estimates with 95% confidence intervals",
    x = "Estimate",
    y = NULL, caption = "Source: European Social Survey"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(color = "grey50", size = 12),
    axis.text.x = element_text(face = "bold"),
    axis.title.y = element_text(face = "bold", size = 9),
    axis.text = element_text(color = "black"),
    axis.line = element_line(color = "black", linewidth = 0.7),
    axis.ticks = element_line(color = "black", linewidth = 0.7),
    legend.position = "none",
    panel.grid = element_blank(),
    plot.margin = margin(20, 20, 20, 20)
  )
Figure7

#same with this plot, we did not use it but I will leave it here. This one is standardized.

#ggsave(Figure7, 
 #      filename = "Figure7.png",
  #    device = "png")

Figure8 <- plot_model(model4, 
                      show.values = TRUE,
                      value.offset = 0.3,
                      type = "pred", 
                      terms = "income", 
                      colors = "#129") +
  labs(
    title = "Figure 8. The Effect of Income on Alcohol Consumption",
    x = NULL,
    y = "Alcohol Consumption (1-7)", caption = "Source: European Social Survey"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(color = "grey50", size = 12),
    axis.title.y = element_text(face = "bold", size = 9),
    axis.text = element_text(color = "black", size = 10),
    axis.text.x = element_text(face = "bold", vjust = 0.5),
    axis.line = element_line(color = "black", linewidth = 0.7),
    panel.grid.major.y = element_line(color = "grey90", linewidth = 0.3),
    panel.grid.minor = element_blank(),
    plot.margin = margin(20, 20, 20, 20)) 

Figure8

#this is not very interesting since it is really just a different format of figure 3, but I will leave it here.

#ggsave(Figure8, 
   #    filename = "Figure8.png",
  #    device = "png")

Figure9 <- interact_plot(inter_age_gender, 
                                 pred = "age", 
                                 modx = "gender",
                                 interval = TRUE,
                                 int.width = 0.95,
                                 colors = results_colors2, legend.main = "Gender") +
  labs(
    title = "Figure 6. The Effect of Age on Alcohol Consumption by Gender",
    subtitle = "Interaction effects with 95% confidence intervals",
    y = "Alcohol Consumption from Everyday (1) to Never (7)",
    x = NULL,
    caption = "Source: European Social Survey"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 12),
    plot.subtitle = element_text(color = "grey50", size = 12),
    axis.title.y = element_text(face = "bold", size = 9),
    axis.text.x = element_text(face = "bold"),
    axis.text = element_text(color = "black"),
    axis.line = element_line(color = "black", linewidth = 0.7),
    legend.position = "bottom",
    legend.title = element_text(face = "bold"),
    legend.background = element_rect(fill = "white", color = "grey90"),
    panel.grid.major.y = element_line(color = "grey90", linewidth = 0.3),
    panel.grid.minor = element_blank(),
    plot.margin = margin(20, 20, 20, 20)
  )

## ✖ Detected factor predictor.
## ℹ Plotting with cat_plot() instead.
## ℹ See `?interactions::cat_plot()` for full details on how to specify models
##   with categorical predictors.
## ℹ If you experience errors or unexpected results, try using cat_plot()
##   directly.

Figure9

#this is the interaction model, which turned out really well. I was very excited to see that it was not a straight line and that the interaction worked. It highlights that men drink more on a daily basis, and it allows for discussion in the written report.

#ggsave(Figure9, 
 #     filename = "Figure9.png", 
  #    device = "png")

Written Report

2025-03-22