library("tidyverse")
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("gt")
library("gapminder")
library("srvyr")
##
## Attaching package: 'srvyr'
##
## The following object is masked from 'package:stats':
##
## filter
library("srvyrexploR")
library("fst")
library("ggridges")
library("plotly")
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library("DT")
library("gridExtra")
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library("patchwork")
library("reshape2")
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
library("finalfit")
library("sjPlot")
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
library("plotly")
library("kableExtra")
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:srvyr':
##
## group_rows
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library("broom")
library("modelsummary")
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
## backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
##
## Revert to `kableExtra` for one session:
##
## options(modelsummary_factory_default = 'kableExtra')
## options(modelsummary_factory_latex = 'kableExtra')
## options(modelsummary_factory_html = 'kableExtra')
##
## Silence this message forever:
##
## config_modelsummary(startup_message = FALSE)
library("ggrepel")
library(geomtextpath)
library("interactions")
library("jtools")
# Load complete ESS data (490,555 observations)
ess <- read_fst("All-ESS-Data.fst")
###Methods and data tables and plots section
#Set up for table
ess_clean <- ess %>%
filter(cntry %in% c("SI", "FI", "IE", "PT")) %>%
mutate(alc_consumption = case_when(
alcfreq %in% c(7) ~ "Never",
alcfreq %in% c(4:6) ~ "Monthly",
alcfreq %in% c(2:3) ~ "Weekly",
alcfreq %in% c(1) ~ "Everyday",
TRUE ~ NA_character_
),
alc_consumption = factor(alc_consumption, levels = c("Never", "Monthly", "Weekly", "Everyday")),
income = case_when(
hinctnta %in% c(1) ~ "Bottom 10%",
hinctnta %in% c(2:5) ~ "11% to 50%",
hinctnta %in% c(6:9) ~ "51% to 90%",
hinctnta %in% c(10) ~ "Top 10%",
TRUE ~ NA_character_
),
income = factor(income, levels = c("Bottom 10%", "11% to 50%", "51% to 90%", "Top 10%")),
age = case_when(
agea %in% c(18:29) ~ "18-29",
agea %in% c(30:39) ~ "30-39",
agea %in% c(40:49) ~ "40-49",
agea %in% c(50:59) ~ "50-59",
agea %in% c(60:69) ~ "60-69",
agea %in% c(70:79) ~ "70-79",
agea >= 80 ~ "80+",
TRUE ~ NA_character_
),
age = factor(age, levels = c("18-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80+")),
gender = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
TRUE ~ NA_character_
))
ess_clean <- ess_clean %>% dplyr::select(alc_consumption, income, age, gender, cntry) %>% filter(cntry == "FI" | cntry == "IE" | cntry == "PT" | cntry == "SI", !is.na(alc_consumption), !is.na(income), !is.na(age), !is.na(gender), !is.na(cntry))
#Set up for all non regression things
ess_clean_regression <- ess %>%
filter(cntry %in% c("SI", "FI", "IE", "PT")) %>%
mutate(
alc_consumption = case_when(
alcfreq > 7 ~ NA_real_,
TRUE ~ as.numeric(alcfreq)
),
income = case_when(
hinctnta %in% c(1) ~ "Bottom 10%",
hinctnta %in% c(2:5) ~ "11% to 50%",
hinctnta %in% c(6:9) ~ "51% to 90%",
hinctnta %in% c(10) ~ "Top 10%",
TRUE ~ NA_character_
),
income = factor(income, levels = c("Bottom 10%", "11% to 50%", "51% to 90%", "Top 10%")),
age = case_when(
agea %in% c(18:29) ~ "18-29",
agea %in% c(30:39) ~ "30-39",
agea %in% c(40:49) ~ "40-49",
agea %in% c(50:59) ~ "50-59",
agea %in% c(60:69) ~ "60-69",
agea %in% c(70:79) ~ "70-79",
agea >= 80 ~ "80+",
TRUE ~ NA_character_
),
age = factor(age, levels = c("18-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80+")),
gender = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
TRUE ~ NA_character_
))
ess_clean_regression <- ess_clean_regression %>% dplyr::select(alc_consumption, income, age, gender, cntry) %>% filter(cntry == "FI" | cntry == "IE" | cntry == "PT" | cntry == "SI", !is.na(alc_consumption), !is.na(income), !is.na(age), !is.na(gender), !is.na(cntry))
#set up for regression things
ess_complete <- ess_clean %>%
filter(!is.na(alc_consumption), !is.na(income), !is.na(age), !is.na(gender))
country_counts <- ess %>%
filter(cntry %in% c("SI", "FI", "IE", "PT")) %>%
count(cntry)
transformed_counts <- ess_clean %>%
filter(!is.na(alc_consumption) | !is.na(income) | !is.na(age) | !is.na(gender)) %>%
count(cntry)
complete_counts <- ess_clean %>%
filter(!is.na(alc_consumption), !is.na(income), !is.na(age), !is.na(gender)) %>%
count(cntry)
#code from fixed initial analysis rmarkdown that prof parker provided us for the correct cleaning.
complete_counts
## cntry n
## 1 FI 1913
## 2 IE 1893
## 3 PT 1038
## 4 SI 975
ess_income <- ess_complete %>%
filter(!is.na(income)) %>%
count(income, cntry) %>%
group_by(cntry) %>%
mutate(
prop = n/sum(n),
pct = round(100 * prop, 2)
)
# Age
ess_age <- ess_complete %>%
filter(!is.na(age)) %>%
count(age, cntry) %>%
group_by(cntry) %>%
mutate(
prop = n/sum(n),
pct = round(100 * prop, 2)
)
# Alcohol
ess_alc <- ess_complete %>%
filter(!is.na(alc_consumption)) %>%
count(alc_consumption, cntry) %>%
group_by(alc_consumption) %>%
mutate(
prop = n/sum(n),
pct = round(100 * prop, 2)
)
# Gender
ess_gender <- ess_complete %>%
filter(!is.na(gender)) %>%
count(gender, cntry) %>%
group_by(cntry) %>%
mutate(
prop = n/sum(n),
pct = round(100 * prop, 2)
)
# Income and alcohol consumption
ess_income_alc <- ess_complete %>%
filter(!is.na(income), !is.na(alc_consumption)) %>%
count(income, alc_consumption) %>%
group_by(income) %>%
mutate(
prop = n/sum(n),
pct = round(100 * prop, 2)
)
labeller = ggplot2::labeller(cntry =
c("FI" = "Finland",
"IE" = "Ireland",
"PT" = "Portugal",
"SI" = "Slovenia"))
#set ups for plots that show pct and counts.
ess_gender_sample <- ess_gender %>%
dplyr::select(n, gender) %>%
group_by(gender) %>%
summarise(across(starts_with("n"), ~sum(., na.rm = TRUE)))
## Adding missing grouping variables: `cntry`
pivot_gender <- ess_gender %>%
dplyr::select(gender, cntry, n, pct, prop) %>%
group_by(cntry, gender) %>%
summarise(Percent = sum(prop, na.rm=TRUE),
Respondents = sum(n, na.rm = TRUE)) %>%
pivot_wider(names_from = cntry, values_from = c(Percent, Respondents))
## `summarise()` has grouped output by 'cntry'. You can override using the
## `.groups` argument.
pivot_gender <- cbind(ess_gender_sample, pivot_gender)
colnames(pivot_gender) <- c("Gender", "Sample_Size", "DROP", "FI_PCT", "IE_PCT", "PT_PCT", "SI_PCT", "FI_N", "IE_N", "PT_N", "SI_N")
pivot_gender$DROP = NULL
#here I am finding the gender sample size, using pivot wider so it shows the sample size in numbers and pct, then using cbind to put it together
ess_income_sample <- ess_income %>%
dplyr::select(n, income) %>%
group_by(income) %>%
summarise(across(starts_with("n"), ~sum(., na.rm = TRUE)))
## Adding missing grouping variables: `cntry`
pivot_income <- ess_income %>%
dplyr::select(income, cntry, n, pct, prop) %>%
group_by(cntry, income) %>%
summarise(Percent = sum(prop, na.rm=TRUE),
Respondents = sum(n, na.rm = TRUE)) %>%
pivot_wider(names_from = cntry, values_from = c(Percent, Respondents))
## `summarise()` has grouped output by 'cntry'. You can override using the
## `.groups` argument.
pivot_income <- cbind(ess_income_sample, pivot_income)
colnames(pivot_income) <- c("Income", "Sample_Size", "DROP", "FI_PCT", "IE_PCT", "PT_PCT", "SI_PCT", "FI_N", "IE_N", "PT_N", "SI_N")
pivot_income$DROP = NULL
#here I am finding the income sample size, using pivot wider so it shows the sample size in numbers and pct, then using cbind to put it together
ess_alc_sample <- ess_alc %>%
dplyr::select(n, alc_consumption) %>%
group_by(alc_consumption) %>%
summarise(across(starts_with("n"), ~sum(., na.rm = TRUE)))
pivot_alc <- ess_alc %>%
dplyr::select(alc_consumption, cntry, n, pct, prop) %>%
group_by(cntry, alc_consumption) %>%
summarise(Percent = sum(prop, na.rm=TRUE),
Respondents = sum(n, na.rm = TRUE)) %>%
pivot_wider(names_from = cntry, values_from = c(Percent, Respondents))
## `summarise()` has grouped output by 'cntry'. You can override using the
## `.groups` argument.
pivot_alc <- cbind(ess_alc_sample, pivot_alc)
colnames(pivot_alc) <- c("Alcohol_Consumption", "Sample_Size", "DROP", "FI_PCT", "IE_PCT", "PT_PCT", "SI_PCT", "FI_N", "IE_N", "PT_N", "SI_N")
pivot_alc$DROP = NULL
#here I am finding the alcohol consumption sample size, using pivot wider so it shows the sample size in numbers and pct, then using cbind to put it together
ess_age_sample <- ess_age %>%
dplyr::select(n, age) %>%
group_by(age) %>%
summarise(across(starts_with("n"), ~sum(., na.rm = TRUE)))
## Adding missing grouping variables: `cntry`
pivot_age <- ess_age %>%
dplyr::select(age, cntry, n, pct, prop) %>%
group_by(cntry, age) %>%
summarise(Percent = sum(prop, na.rm=TRUE),
Respondents = sum(n, na.rm = TRUE)) %>%
pivot_wider(names_from = cntry, values_from = c(Percent, Respondents))
## `summarise()` has grouped output by 'cntry'. You can override using the
## `.groups` argument.
pivot_age <- cbind(ess_age_sample, pivot_age)
colnames(pivot_age) <- c("Age", "Sample_Size", "DROP", "FI_PCT", "IE_PCT", "PT_PCT", "SI_PCT", "FI_N", "IE_N", "PT_N", "SI_N")
pivot_age$DROP = NULL
#here I am finding the age sample size, using pivot wider so it shows the sample size in numbers and pct, then using cbind to put it together
binded <- bind_rows(
pivot_alc %>%
mutate(characteristic = "Alcohol Consumption",
category = Alcohol_Consumption) %>%
dplyr::select(-Alcohol_Consumption),
pivot_income %>%
mutate(characteristic = "Income",
category = Income) %>%
dplyr::select(-Income),
pivot_age %>%
mutate(characteristic = "Age",
category = Age) %>%
dplyr::select(-Age),
pivot_gender %>%
mutate(characteristic = "Gender",
category = Gender) %>%
dplyr::select(-Gender))
#since I have all those cbinded tables, now I am using bind_rows to put them all on top of eachother to create one simple statistics table.
#Summary Table
Table1 <- binded %>%
gt(
groupname_col = "characteristic"
) %>%
cols_move_to_start(category) %>%
tab_header(
title = md("**Table 1**"),
subtitle = md("*Source: European Social Survey*")
) %>%
tab_spanner(label = md("**Respondents**"), columns = c("FI_N", "IE_N", "PT_N", "SI_N")) %>%
tab_spanner(label = md("**Percentage**"), columns = c("FI_PCT", "IE_PCT", "PT_PCT", "SI_PCT")) %>%
cols_label(
category = md("**Category**"),
Sample_Size = md("*Sample Size*"),
FI_N = md("*Finland*"),
IE_N = md("*Ireland*"),
PT_N = md("*Portugal*"),
SI_N = md("*Slovenia*"),
FI_PCT = md("*Finland*"),
IE_PCT = md("*Ireland*"),
PT_PCT = md("*Portugal*"),
SI_PCT = md("*Slovenia*")
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_row_groups()
) %>%
fmt_percent(columns = c("FI_PCT", "IE_PCT", "PT_PCT", "SI_PCT")) %>%
tab_options(
table.border.top.width = 4,
table.border.bottom.width = 4,
column_labels.border.bottom.width = 2,
heading.title.font.size = px(18),
heading.subtitle.font.size = px(14),
data_row.padding = px(8)
) %>%
cols_align("center") %>%
tab_style(
style = list(
cell_fill(color = "#f5f5f5")
),
locations = cells_body(rows = seq(1, 17, 2))
) %>%
tab_source_note(source_note = md("*Total Sample Size: 5918*"))
Table1
Table 1 | |||||||||
Source: European Social Survey | |||||||||
Category | Sample Size |
Percentage
|
Respondents
|
||||||
---|---|---|---|---|---|---|---|---|---|
Finland | Ireland | Portugal | Slovenia | Finland | Ireland | Portugal | Slovenia | ||
Alcohol Consumption | |||||||||
Never | 1212 | 19.14% | 40.51% | 24.50% | 15.84% | 232 | 491 | 297 | 192 |
Monthly | 2232 | 44.44% | 25.58% | 12.19% | 17.79% | 992 | 571 | 272 | 397 |
Weekly | 1992 | 33.03% | 39.11% | 12.00% | 15.86% | 658 | 779 | 239 | 316 |
Everyday | 383 | 8.09% | 13.58% | 60.05% | 18.28% | 31 | 52 | 230 | 70 |
Income | |||||||||
Bottom 10% | 771 | 8.63% | 19.60% | 11.75% | 11.59% | 165 | 371 | 122 | 113 |
11% to 50% | 2769 | 38.11% | 53.04% | 51.35% | 51.59% | 729 | 1004 | 533 | 503 |
51% to 90% | 1992 | 45.53% | 24.41% | 33.24% | 32.21% | 871 | 462 | 345 | 314 |
Top 10% | 287 | 7.74% | 2.96% | 3.66% | 4.62% | 148 | 56 | 38 | 45 |
Age | |||||||||
18-29 | 780 | 13.96% | 13.26% | 11.37% | 14.77% | 267 | 251 | 118 | 144 |
30-39 | 932 | 13.75% | 18.75% | 16.18% | 14.97% | 263 | 355 | 168 | 146 |
40-49 | 902 | 14.90% | 16.69% | 14.74% | 15.18% | 285 | 316 | 153 | 148 |
50-59 | 1038 | 19.08% | 17.12% | 15.51% | 19.28% | 365 | 324 | 161 | 188 |
60-69 | 1141 | 22.43% | 18.07% | 18.02% | 18.77% | 429 | 342 | 187 | 183 |
70-79 | 697 | 9.88% | 11.09% | 16.96% | 12.51% | 189 | 210 | 176 | 122 |
80+ | 329 | 6.01% | 5.02% | 7.23% | 4.51% | 115 | 95 | 75 | 44 |
Gender | |||||||||
Female | 3084 | 50.29% | 54.15% | 54.24% | 54.77% | 962 | 1025 | 563 | 534 |
Male | 2735 | 49.71% | 45.85% | 45.76% | 45.23% | 951 | 868 | 475 | 441 |
Total Sample Size: 5918 |
#here I am using that now one complete dataset to create the Table1 summary table. It is in the same order as the regression model.
#Table1 |> gtsave(filename = "Table1.png") #saving table as image so I can insert it into our report instead of screenshoting it.
Figure1 <- ggplot(data = ess_income_alc, mapping = aes(x = income, y = prop, fill = alc_consumption)) +
geom_bar(position = "dodge", stat = "identity",alpha = 0.9, color = "white") +
labs(title = "Figure 1. Relationship between Income and Alcohol Consumption", subtitle = "Finland, Ireland, Portugal, and Slovenia", x = "Income", y = "Percentage", fill = "Alcohol Consumption Levels", caption = "Source: European Social Survey") + theme_minimal() +
scale_fill_viridis_d(option = "E") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_y_continuous(labels = scales::percent_format(), limits = c(0, 0.5), breaks = seq(0, 0.5, by = 0.1)) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(color = "grey50", size = 12),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
axis.text = element_text(color = "black"),
axis.line = element_line(color = "black", linewidth = 0.7),
axis.ticks = element_line(color = "black", linewidth = 0.7),
legend.position = "none",
panel.grid = element_blank(),
plot.margin = margin(20, 20, 20, 20)
)
Figure1
#first descriptive plot. I wanted to compare income and alcohol consumption summary stats since it is our main relationship of interest.
#ggsave(Figure1,
# filename = "Figure1.png",
# device = "png")
##Results
ess_1 <- ess_clean_regression %>%
summarize(
median = median(alc_consumption, na.rm = TRUE),
q25 = quantile(alc_consumption, 0.25, na.rm = TRUE),
q75 = quantile(alc_consumption, 0.75, na.rm = TRUE),
iqr = q75 - q25,
mean_alc = mean(alc_consumption, na.rm = TRUE),
sd_alc = sd(alc_consumption, na.rm = TRUE),
n = n(),
se_alc = sd_alc / sqrt(n),
.groups = "drop"
) %>%
mutate_if(is.numeric, ~round(., 2))
#means calculated so i can put them on the figures!
ess_2 <- ess_clean_regression %>%
group_by(income) %>%
summarize(
median = median(alc_consumption, na.rm = TRUE),
q25 = quantile(alc_consumption, 0.25, na.rm = TRUE),
q75 = quantile(alc_consumption, 0.75, na.rm = TRUE),
iqr = q75 - q25,
mean_alc = mean(alc_consumption, na.rm = TRUE),
sd_alc = sd(alc_consumption, na.rm = TRUE),
n = n(),
se_alc = sd_alc / sqrt(n),
.groups = "drop"
) %>%
mutate_if(is.numeric, ~round(., 2))
#means calculated so i can put them on the figures!
ess_3 <- ess_clean_regression %>%
group_by(age) %>%
summarize(
median = median(alc_consumption, na.rm = TRUE),
q25 = quantile(alc_consumption, 0.25, na.rm = TRUE),
q75 = quantile(alc_consumption, 0.75, na.rm = TRUE),
iqr = q75 - q25,
mean_alc = mean(alc_consumption, na.rm = TRUE),
sd_alc = sd(alc_consumption, na.rm = TRUE),
n = n(),
se_alc = sd_alc / sqrt(n),
.groups = "drop"
) %>%
mutate_if(is.numeric, ~round(., 2))
#means calculated so i can put them on the figures!
ess_4 <- ess_clean_regression %>%
group_by(gender) %>%
summarize(
median = median(alc_consumption, na.rm = TRUE),
q25 = quantile(alc_consumption, 0.25, na.rm = TRUE),
q75 = quantile(alc_consumption, 0.75, na.rm = TRUE),
iqr = q75 - q25,
mean_alc = mean(alc_consumption, na.rm = TRUE),
sd_alc = sd(alc_consumption, na.rm = TRUE),
n = n(),
se_alc = sd_alc / sqrt(n),
.groups = "drop"
) %>%
mutate_if(is.numeric, ~round(., 2))
#means calculated so i can put them on the figures!
Figure2 <- ggplot(ess_clean_regression, aes(x = alc_consumption)) +
geom_histogram(fill = "#770737") +
geom_vline(
xintercept = mean(ess_clean_regression$alc_consumption, na.rm = TRUE), linetype = "dashed",
color = "black",
linewidth = 0.7
) +
geom_textvline(label = "Mean: 4.36", xintercept = 4.36, vjust = 1.3, size = 3.5, text_only = TRUE) +
labs(
title = "Figure 2. Distribution of Alcohol Consumption",
subtitle = "Finland, Ireland, Portugal, and Slovenia",
x = "Alcohol Consumption from Everyday (1) to Never (7)",
y = "Repondant Count", caption = "Source: European Social Survey"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(color = "grey50", size = 12),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
axis.text = element_text(color = "black"),
axis.line = element_line(color = "black", linewidth = 0.7),
axis.ticks = element_line(color = "black", linewidth = 0.7),
legend.position = "none",
panel.grid = element_blank(),
plot.margin = margin(20, 20, 20, 20)
) +
scale_x_continuous(limits = c(0, 8), breaks = seq(0, 8, by = 1)) +
scale_y_continuous(limits = c(0,1500), breaks = seq(0, 1300, by = 250))
Figure2
## Warning in geom_textvline(label = "Mean: 4.36", xintercept = 4.36, vjust = 1.3, : All aesthetics have length 1, but the data has 5819 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
#the above caluclated means really helped me with this plot and all the following plots. I really wanted to put the number on the plot since it was said in class that we cannot guess/estimate what the mean is, and what if I forget or stumble over my words. Now it is there directly on the plot and the viewers can see it. I also kept the same sort of theme for all the plots because I wanted there to be profesionnalism and unity between them. I didn't want it to look like they belonged in separate presentations.
#ggsave(Figure2,
# filename = "Figure2.png",
# device = "png")
setup1 <- ess_clean_regression %>%
filter(!is.na(alc_consumption), !is.na(income)) %>%
group_by(income) %>%
summarize(
mean_alc = mean(alc_consumption, na.rm = TRUE),
sd_alc = sd(alc_consumption, na.rm = TRUE),
n = n(),
se_alc = sd_alc / sqrt(n),
ci_lower = mean_alc - 1.96 * se_alc,
ci_upper = mean_alc + 1.96 * se_alc,
.groups = "drop"
)
results_colors1 <- c("#647695", "#8A9CBC", "#B0C4E2", "#D6E4FF")
Figure3 <- ggplot(setup1, aes(y = income , x = mean_alc, fill = income)) +
geom_col(alpha = 0.9, width = 0.7, color = "black", linewidth = 0.3) +
geom_errorbarh(
aes(xmin = ci_lower, xmax = ci_upper),
height = 0.2,
color = "black",
linewidth = 0.7
) +
geom_text_repel(
data = setup1,
aes(label = case_when(
income == "Bottom 10%" ~ "Mean: 4.84",
income == "11% to 50%" ~ "Mean: 4.51",
income == "51% to 90%" ~ "Mean: 4.03",
income == "Top 10%" ~ "Mean: 3.86"
)),
nudge_x = 1,
direction = "y",
segment.size = 0,
box.padding = 0.5,
size = 3.5,
fontface = "italic"
) +
scale_fill_manual(values = results_colors1) +
scale_x_continuous(limits = c(0, 7), expand = c(0, 0), breaks = seq(0, 7, by = 1)) +
labs(
title = "Figure 3. Alcohol Consumption Mean by Income in Europe",
subtitle = "Finland, Ireland, Portugal, and Slovenia",
y = NULL,
x = "Alcohol Consumption from Everyday (1) to Never (7)",
caption = "Source: European Social Survey"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(color = "grey50", size = 12),
axis.title.x = element_text(face = "bold"),
axis.text.y = element_text(face = "bold"),
axis.text = element_text(color = "black"),
axis.line = element_line(color = "black", linewidth = 0.7),
axis.ticks = element_line(color = "black", linewidth = 0.7),
legend.position = "none",
panel.grid = element_blank(),
plot.margin = margin(20, 20, 20, 20)
)
Figure3
#loving how the means look on the plot. Also how there is the confidence interval right on the plot.
#ggsave(Figure3,
# filename = "Figure3.png",
# device = "png")
setup2 <- ess_clean_regression %>%
filter(!is.na(alc_consumption), !is.na(gender)) %>%
group_by(gender) %>%
summarize(
mean_alc = mean(alc_consumption, na.rm = TRUE),
sd_alc = sd(alc_consumption, na.rm = TRUE),
n = n(),
se_alc = sd_alc / sqrt(n),
ci_lower = mean_alc - 1.96 * se_alc,
ci_upper = mean_alc + 1.96 * se_alc,
.groups = "drop"
)
results_colors2 <- c("#6E260E", "#C04000")
Figure4 <- ggplot(setup2, aes(y = gender, x = mean_alc, fill = gender)) +
geom_col(alpha = 0.9, width = 0.7, color = "black", linewidth = 0.3) +
geom_errorbarh(
aes(xmin = ci_lower, xmax = ci_upper), #confidence interval showing
height = 0.2,
color = "black",
linewidth = 0.7
) +
geom_text_repel(
data = setup2,
aes(label = case_when(
gender == "Female" ~ "Mean: 4.83",
gender == "Male" ~ "Mean: 3.82"
)),
nudge_x = 1,
direction = "y",
segment.size = 0,
box.padding = 0.5,
size = 3.5,
fontface = "italic"
) +
scale_fill_manual(values = results_colors2) +
scale_x_continuous(limits = c(0, 7), expand = c(0, 0), breaks = seq(0, 7, by = 1)) +
labs(
title = "Figure 4. Alcohol Consumption Mean by Gender in Europe",
subtitle = "Finland, Ireland, Portugal, and Slovenia",
y = NULL,
x = "Alcohol Consumption from Everyday (1) to Never (7)",
caption = "Source: European Social Survey"
) + theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(color = "grey50", size = 12),
axis.title.x = element_text(face = "bold"),
axis.text.y = element_text(face = "bold"),
axis.text = element_text(color = "black"),
axis.line = element_line(color = "black", linewidth = 0.7),
axis.ticks = element_line(color = "black", linewidth = 0.7),
legend.position = "none",
panel.grid = element_blank(),
plot.margin = margin(20, 20, 20, 20)
)
Figure4
#this was the biggest shocker plot, with men drinking more than women by a full unit.
#ggsave(Figure4,
# filename = "Figure4.png",
# device = "png")
setup3 <- ess_clean_regression %>%
filter(!is.na(alc_consumption), !is.na(age)) %>%
group_by(age) %>%
summarize(
mean_alc = mean(alc_consumption, na.rm = TRUE),
sd_alc = sd(alc_consumption, na.rm = TRUE),
n = n(),
se_alc = sd_alc / sqrt(n),
ci_lower = mean_alc - 1.96 * se_alc,
ci_upper = mean_alc + 1.96 * se_alc,
.groups = "drop"
)
results_colors3 <- c("#355E3B", "#2E8B57", "#00A36C", "#50C878", "#90EE90", "#98FB98", "#C1E1C1")
Figure5 <- ggplot(setup3, aes(y = age, x = mean_alc, fill = age)) +
geom_col(alpha = 0.9, width = 0.7, color = "black", linewidth = 0.3) +
geom_errorbarh(
aes(xmin = ci_lower, xmax = ci_upper),
height = 0.2,
color = "black",
linewidth = 0.7
) +
geom_text_repel(
data = setup3,
aes(label = case_when(
age == "18-29" ~ "Mean: 4.46",
age == "30-39" ~ "Mean: 4.38",
age == "40-49" ~ "Mean: 4.21",
age == "50-59" ~ "Mean: 4.04",
age == "60-69" ~ "Mean: 4.23",
age == "70-79" ~ "Mean: 4.64",
age == "80+" ~ "Mean: 5.25"
)),
nudge_x = 1,
direction = "y",
segment.size = 0,
box.padding = 0.5,
size = 3.5,
fontface = "italic"
) +
scale_fill_manual(values = results_colors3) +
scale_x_continuous(limits = c(0, 7), expand = c(0, 0), breaks = seq(0, 7, by = 1)) +
labs(
title = "Figure 5. Alcohol Consumption Mean by Age Group in Europe",
subtitle = "Finland, Ireland, Portugal, and Slovenia",
y = NULL,
x = "Alcohol Consumption from Everyday (1) to Never (7)",
caption = "Source: European Social Survey"
) + theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(color = "grey50", size = 12),
axis.title.x = element_text(face = "bold"),
axis.text.y = element_text(face = "bold"),
axis.text = element_text(color = "black"),
axis.line = element_line(color = "black", linewidth = 0.7),
axis.ticks = element_line(color = "black", linewidth = 0.7),
legend.position = "none",
panel.grid = element_blank(),
plot.margin = margin(20, 20, 20, 20)
)
Figure5
# I really wanted to do boxplots like you did in tutorial 8, but since alcohol consumption is not truly continuous it didn't work properly. But I am really happy with how these turned out.
#ggsave(Figure5,
# filename = "Figure5.png",
# device = "png")
##Regression
model1 <- lm(alc_consumption ~ income, data = ess_clean_regression)
summary(model1) #model 1 creation
##
## Call:
## lm(formula = alc_consumption ~ income, data = ess_clean_regression)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.8431 -1.5096 -0.0256 1.9744 3.1429
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.84306 0.06907 70.118 < 2e-16 ***
## income11% to 50% -0.33349 0.07810 -4.270 1.98e-05 ***
## income51% to 90% -0.81746 0.08135 -10.049 < 2e-16 ***
## incomeTop 10% -0.98592 0.13262 -7.434 1.20e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.918 on 5815 degrees of freedom
## Multiple R-squared: 0.0245, Adjusted R-squared: 0.02399
## F-statistic: 48.67 on 3 and 5815 DF, p-value: < 2.2e-16
model2 <- lm(alc_consumption ~ income + age, data = ess_clean_regression)
summary(model2) # model 2 creation
##
## Call:
## lm(formula = alc_consumption ~ income + age, data = ess_clean_regression)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4955 -1.5659 -0.1662 1.7873 3.3483
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.84874 0.09242 52.466 < 2e-16 ***
## income11% to 50% -0.28285 0.07794 -3.629 0.000287 ***
## income51% to 90% -0.70152 0.08290 -8.463 < 2e-16 ***
## incomeTop 10% -0.85375 0.13358 -6.391 1.77e-10 ***
## age30-39 0.01903 0.09298 0.205 0.837867
## age40-49 -0.14935 0.09365 -1.595 0.110795
## age50-59 -0.34333 0.09059 -3.790 0.000152 ***
## age60-69 -0.22564 0.08860 -2.547 0.010904 *
## age70-79 0.10886 0.09968 1.092 0.274820
## age80+ 0.64678 0.12610 5.129 3.01e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.905 on 5809 degrees of freedom
## Multiple R-squared: 0.03832, Adjusted R-squared: 0.03683
## F-statistic: 25.72 on 9 and 5809 DF, p-value: < 2.2e-16
model3 <- lm(alc_consumption ~ income + age + gender, data = ess_clean_regression)
summary(model3) #model 3 creation
##
## Call:
## lm(formula = alc_consumption ~ income + age + gender, data = ess_clean_regression)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8150 -1.4686 -0.1355 1.5610 3.7611
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.22856 0.09156 57.106 < 2e-16 ***
## income11% to 50% -0.19138 0.07562 -2.531 0.01140 *
## income51% to 90% -0.55087 0.08064 -6.831 9.29e-12 ***
## incomeTop 10% -0.65959 0.12974 -5.084 3.81e-07 ***
## age30-39 -0.04693 0.09011 -0.521 0.60254
## age40-49 -0.22081 0.09076 -2.433 0.01501 *
## age50-59 -0.37136 0.08774 -4.233 2.35e-05 ***
## age60-69 -0.23871 0.08581 -2.782 0.00542 **
## age70-79 0.09830 0.09653 1.018 0.30854
## age80+ 0.58646 0.12216 4.801 1.62e-06 ***
## genderMale -0.95874 0.04879 -19.652 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.845 on 5808 degrees of freedom
## Multiple R-squared: 0.09828, Adjusted R-squared: 0.09672
## F-statistic: 63.3 on 10 and 5808 DF, p-value: < 2.2e-16
model4 <- lm(alc_consumption ~ income + age + gender + cntry, data = ess_clean_regression)
summary(model4) #model 4 creation, decided to add country into the mix since I did not facet any of the plots. Faceting just looks too busy and hard to follow.
##
## Call:
## lm(formula = alc_consumption ~ income + age + gender + cntry,
## data = ess_clean_regression)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8433 -1.4927 -0.1877 1.5866 3.9281
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.35017 0.10055 53.211 < 2e-16 ***
## income11% to 50% -0.18558 0.07577 -2.449 0.01435 *
## income51% to 90% -0.56811 0.08213 -6.917 5.11e-12 ***
## incomeTop 10% -0.69474 0.13100 -5.303 1.18e-07 ***
## age30-39 -0.03014 0.09013 -0.334 0.73810
## age40-49 -0.21013 0.09066 -2.318 0.02050 *
## age50-59 -0.36867 0.08756 -4.211 2.58e-05 ***
## age60-69 -0.24025 0.08566 -2.805 0.00505 **
## age70-79 0.12908 0.09653 1.337 0.18119
## age80+ 0.59908 0.12211 4.906 9.55e-07 ***
## genderMale -0.96297 0.04869 -19.776 < 2e-16 ***
## cntryIE -0.10598 0.06173 -1.717 0.08604 .
## cntryPT -0.37852 0.07175 -5.275 1.37e-07 ***
## cntrySI -0.12931 0.07308 -1.769 0.07688 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.841 on 5805 degrees of freedom
## Multiple R-squared: 0.1026, Adjusted R-squared: 0.1006
## F-statistic: 51.07 on 13 and 5805 DF, p-value: < 2.2e-16
inter_age_gender <- lm(alc_consumption ~ income + age * gender + cntry, data = ess_clean_regression)
summary(inter_age_gender) # and finally this is the interaction model. I did try to do another one with country and age and it was just a mess with four lines showing across the graph instead of the two for gender. Gender is easier to follow and understand.
##
## Call:
## lm(formula = alc_consumption ~ income + age * gender + cntry,
## data = ess_clean_regression)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.0420 -1.4913 -0.1718 1.5106 3.9550
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.28070 0.11905 44.358 < 2e-16 ***
## income11% to 50% -0.16220 0.07589 -2.137 0.03262 *
## income51% to 90% -0.54179 0.08231 -6.582 5.04e-11 ***
## incomeTop 10% -0.68011 0.13095 -5.194 2.13e-07 ***
## age30-39 -0.06806 0.12396 -0.549 0.58300
## age40-49 -0.24763 0.12446 -1.990 0.04667 *
## age50-59 -0.35749 0.12261 -2.916 0.00356 **
## age60-69 -0.17443 0.12037 -1.449 0.14737
## age70-79 0.37007 0.13451 2.751 0.00595 **
## age80+ 0.86103 0.16255 5.297 1.22e-07 ***
## genderMale -0.86777 0.13173 -6.587 4.87e-11 ***
## cntryIE -0.09976 0.06168 -1.617 0.10584
## cntryPT -0.37899 0.07168 -5.287 1.29e-07 ***
## cntrySI -0.12884 0.07300 -1.765 0.07763 .
## age30-39:genderMale 0.08887 0.17898 0.497 0.61954
## age40-49:genderMale 0.09079 0.18044 0.503 0.61488
## age50-59:genderMale -0.02334 0.17435 -0.134 0.89352
## age60-69:genderMale -0.13442 0.17108 -0.786 0.43209
## age70-79:genderMale -0.50091 0.19224 -2.606 0.00919 **
## age80+:genderMale -0.61330 0.24516 -2.502 0.01239 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.839 on 5799 degrees of freedom
## Multiple R-squared: 0.1058, Adjusted R-squared: 0.1028
## F-statistic: 36.1 on 19 and 5799 DF, p-value: < 2.2e-16
tab_model(
model1, model2, model3, model4, inter_age_gender, title = "Table.2 Regression Models Predicting Alcohol Consumption across Four variables",
dv.labels = c("Model 1: Income", "Model 2: + Age", "Model 3: + Gender", "Model 4: + Country", "Interaction Model: Age * Gender"), pred.labels = c("Intercept", "Income 11% to 50%", " Income 51% to 90%", "Income Top 10%", "Age 30-39", "Age 40-49", "Age 50-59", "Age 60-69", "Age 70-79", "Age 80+", "Male", "Ireland", "Portugal", "Slovenia", "Age 30-39 x Male", "Age 40-49 x Male", "Age 50-59 x Male", "Age 60-69 x Male", "Age 70-79 x Male", "Age 80+ x Male"
),
string.est = "Estimate",
string.ci = "95% CI",
string.p = "p-value",
p.style = "numeric",
collapse.ci = TRUE,
show.aic = TRUE,
show.r2 = TRUE,
show.obs = TRUE,
digits = 2,
digits.p = 3)
 | Model 1: Income | Model 2: + Age | Model 3: + Gender | Model 4: + Country | Interaction Model: Age * Gender | |||||
---|---|---|---|---|---|---|---|---|---|---|
Predictors | Estimate | p-value | Estimate | p-value | Estimate | p-value | Estimate | p-value | Estimate | p-value |
Intercept |
4.84 (4.71 – 4.98) |
<0.001 |
4.85 (4.67 – 5.03) |
<0.001 |
5.23 (5.05 – 5.41) |
<0.001 |
5.35 (5.15 – 5.55) |
<0.001 |
5.28 (5.05 – 5.51) |
<0.001 |
Income 11% to 50% |
-0.33 (-0.49 – -0.18) |
<0.001 |
-0.28 (-0.44 – -0.13) |
<0.001 |
-0.19 (-0.34 – -0.04) |
0.011 |
-0.19 (-0.33 – -0.04) |
0.014 |
-0.16 (-0.31 – -0.01) |
0.033 |
Income 51% to 90% |
-0.82 (-0.98 – -0.66) |
<0.001 |
-0.70 (-0.86 – -0.54) |
<0.001 |
-0.55 (-0.71 – -0.39) |
<0.001 |
-0.57 (-0.73 – -0.41) |
<0.001 |
-0.54 (-0.70 – -0.38) |
<0.001 |
Income Top 10% |
-0.99 (-1.25 – -0.73) |
<0.001 |
-0.85 (-1.12 – -0.59) |
<0.001 |
-0.66 (-0.91 – -0.41) |
<0.001 |
-0.69 (-0.95 – -0.44) |
<0.001 |
-0.68 (-0.94 – -0.42) |
<0.001 |
Age 30-39 |
0.02 (-0.16 – 0.20) |
0.838 |
-0.05 (-0.22 – 0.13) |
0.603 |
-0.03 (-0.21 – 0.15) |
0.738 |
-0.07 (-0.31 – 0.17) |
0.583 | ||
Age 40-49 |
-0.15 (-0.33 – 0.03) |
0.111 |
-0.22 (-0.40 – -0.04) |
0.015 |
-0.21 (-0.39 – -0.03) |
0.021 |
-0.25 (-0.49 – -0.00) |
0.047 | ||
Age 50-59 |
-0.34 (-0.52 – -0.17) |
<0.001 |
-0.37 (-0.54 – -0.20) |
<0.001 |
-0.37 (-0.54 – -0.20) |
<0.001 |
-0.36 (-0.60 – -0.12) |
0.004 | ||
Age 60-69 |
-0.23 (-0.40 – -0.05) |
0.011 |
-0.24 (-0.41 – -0.07) |
0.005 |
-0.24 (-0.41 – -0.07) |
0.005 |
-0.17 (-0.41 – 0.06) |
0.147 | ||
Age 70-79 |
0.11 (-0.09 – 0.30) |
0.275 |
0.10 (-0.09 – 0.29) |
0.309 |
0.13 (-0.06 – 0.32) |
0.181 |
0.37 (0.11 – 0.63) |
0.006 | ||
Age 80+ |
0.65 (0.40 – 0.89) |
<0.001 |
0.59 (0.35 – 0.83) |
<0.001 |
0.60 (0.36 – 0.84) |
<0.001 |
0.86 (0.54 – 1.18) |
<0.001 | ||
Male |
-0.96 (-1.05 – -0.86) |
<0.001 |
-0.96 (-1.06 – -0.87) |
<0.001 |
-0.87 (-1.13 – -0.61) |
<0.001 | ||||
Ireland |
-0.11 (-0.23 – 0.02) |
0.086 |
-0.10 (-0.22 – 0.02) |
0.106 | ||||||
Portugal |
-0.38 (-0.52 – -0.24) |
<0.001 |
-0.38 (-0.52 – -0.24) |
<0.001 | ||||||
Slovenia |
-0.13 (-0.27 – 0.01) |
0.077 |
-0.13 (-0.27 – 0.01) |
0.078 | ||||||
Age 30-39 x Male |
0.09 (-0.26 – 0.44) |
0.620 | ||||||||
Age 40-49 x Male |
0.09 (-0.26 – 0.44) |
0.615 | ||||||||
Age 50-59 x Male |
-0.02 (-0.37 – 0.32) |
0.894 | ||||||||
Age 60-69 x Male |
-0.13 (-0.47 – 0.20) |
0.432 | ||||||||
Age 70-79 x Male |
-0.50 (-0.88 – -0.12) |
0.009 | ||||||||
Age 80+ x Male |
-0.61 (-1.09 – -0.13) |
0.012 | ||||||||
Observations | 5819 | 5819 | 5819 | 5819 | 5819 | |||||
R2 / R2 adjusted | 0.024 / 0.024 | 0.038 / 0.037 | 0.098 / 0.097 | 0.103 / 0.101 | 0.106 / 0.103 | |||||
AIC | 24098.438 | 24027.397 | 23654.791 | 23632.578 | 23624.263 |
#here was the table2 for regression models. I had a lot of trouble saving this. I Got it in but I had to use an online converter to change from html to png.
Figure6 <- plot_model(model3,
show.values = TRUE,
value.offset = 0.3, colors = c("#a13d61", "#649e92")) +
labs(
title = "Figure 6. Predictors of Alcohol Consumption",
subtitle = "Coefficient estimates with 95% confidence intervals",
x = "Estimate",
y = NULL, caption = "Source: European Social Survey"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(color = "grey50", size = 12),
axis.text.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold", size = 9),
axis.text = element_text(color = "black"),
axis.line = element_line(color = "black", linewidth = 0.7),
axis.ticks = element_line(color = "black", linewidth = 0.7),
legend.position = "none",
panel.grid = element_blank(),
plot.margin = margin(20, 20, 20, 20)
)
Figure6
#we did not use this figure, but I am going to leave it in here incase we use it in the presentation. This is the unstandardized predictor plot.
#ggsave(Figure6,
# filename = "Figure6.png",
# device = "png")
Figure7 <- plot_model(model3,
show.values = TRUE,
value.offset = 0.3,
type = "std2", colors = c("#a13d61", "#649e92")) +
labs(
title = "Figure 7. Standardized Predictors of Alcohol Consumption",
subtitle = "Coefficient estimates with 95% confidence intervals",
x = "Estimate",
y = NULL, caption = "Source: European Social Survey"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(color = "grey50", size = 12),
axis.text.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold", size = 9),
axis.text = element_text(color = "black"),
axis.line = element_line(color = "black", linewidth = 0.7),
axis.ticks = element_line(color = "black", linewidth = 0.7),
legend.position = "none",
panel.grid = element_blank(),
plot.margin = margin(20, 20, 20, 20)
)
Figure7
#same with this plot, we did not use it but I will leave it here. This one is standardized.
#ggsave(Figure7,
# filename = "Figure7.png",
# device = "png")
Figure8 <- plot_model(model4,
show.values = TRUE,
value.offset = 0.3,
type = "pred",
terms = "income",
colors = "#129") +
labs(
title = "Figure 8. The Effect of Income on Alcohol Consumption",
x = NULL,
y = "Alcohol Consumption (1-7)", caption = "Source: European Social Survey"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(color = "grey50", size = 12),
axis.title.y = element_text(face = "bold", size = 9),
axis.text = element_text(color = "black", size = 10),
axis.text.x = element_text(face = "bold", vjust = 0.5),
axis.line = element_line(color = "black", linewidth = 0.7),
panel.grid.major.y = element_line(color = "grey90", linewidth = 0.3),
panel.grid.minor = element_blank(),
plot.margin = margin(20, 20, 20, 20))
Figure8
#this is not very interesting since it is really just a different format of figure 3, but I will leave it here.
#ggsave(Figure8,
# filename = "Figure8.png",
# device = "png")
Figure9 <- interact_plot(inter_age_gender,
pred = "age",
modx = "gender",
interval = TRUE,
int.width = 0.95,
colors = results_colors2, legend.main = "Gender") +
labs(
title = "Figure 6. The Effect of Age on Alcohol Consumption by Gender",
subtitle = "Interaction effects with 95% confidence intervals",
y = "Alcohol Consumption from Everyday (1) to Never (7)",
x = NULL,
caption = "Source: European Social Survey"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(color = "grey50", size = 12),
axis.title.y = element_text(face = "bold", size = 9),
axis.text.x = element_text(face = "bold"),
axis.text = element_text(color = "black"),
axis.line = element_line(color = "black", linewidth = 0.7),
legend.position = "bottom",
legend.title = element_text(face = "bold"),
legend.background = element_rect(fill = "white", color = "grey90"),
panel.grid.major.y = element_line(color = "grey90", linewidth = 0.3),
panel.grid.minor = element_blank(),
plot.margin = margin(20, 20, 20, 20)
)
## ✖ Detected factor predictor.
## ℹ Plotting with cat_plot() instead.
## ℹ See `?interactions::cat_plot()` for full details on how to specify models
## with categorical predictors.
## ℹ If you experience errors or unexpected results, try using cat_plot()
## directly.
Figure9
#this is the interaction model, which turned out really well. I was very excited to see that it was not a straight line and that the interaction worked. It highlights that men drink more on a daily basis, and it allows for discussion in the written report.
#ggsave(Figure9,
# filename = "Figure9.png",
# device = "png")