knitr::opts_chunk$set(echo = TRUE)
# Load required libraries
# install.packages("foreign")
# install.packages("dplyr")
# install.packages("broom")
# install.packages("lavaan")
# install.packages("semPlot")
library(foreign)
library(ltm)
library(likert) # create basic likert tables and plots
library(kableExtra) # create formatted tables
library(dplyr)
library(ggplot2)
library(lavaan)
library(semPlot)
setwd("/Users/macbook/Desktop/R/Final Paper")
df = read.spss("~/Desktop/R/Final Paper/ESS11.sav", to.data.frame = T)
Depression is a globally prevalent mental health condition and a leading contributor to disability, according to the World Health Organization (2017). Understanding the factors that influence depression is critical for informing evidence-based mental health interventions and social policy.
This study investigates depression as a primary outcome variable using a quantitative cross-sectional design. The analysis focuses on Hungary, drawing on data from the 11th round of the European Social Survey (ESS). It explores how social determinants, including education, gender, self-reported health, internet use, and socializing frequency relate to depressive symptoms, with the aim of identifying significant predictors and informing targeted interventions.
# Filtering dataset to only include responses from Hungary
df = df[df$cntry=="Hungary",]
sample_size = nrow(df)
# DATA PREP
# Keep only Hungary data
df = df[df$cntry == "Hungary", ]
# Make binary gender: Female = 1, Male = 0
df$female <- NA
df$female[df$gndr == "Female"] <- 1
df$female[df$gndr == "Male"] <- 0
# Recode education into low, medium, high groups
df$edu <- factor(NA, levels = c("low", "medium", "high"))
df$edu[df$eisced %in% c("ES-ISCED I , less than lower secondary",
"ES-ISCED II, lower secondary")] <- "low"
df$edu[df$eisced %in% c("ES-ISCED IIIb, lower tier upper secondary",
"ES-ISCED IIIa, upper tier upper secondary")] <- "medium"
df$edu[df$eisced %in% c("ES-ISCED IV, advanced vocational, sub-degree",
"ES-ISCED V1, lower tertiary education, BA level",
"ES-ISCED V2, higher tertiary education, >= MA level")] <- "high"
# Numeric version for regression
df$edunum <- as.numeric(factor(df$edu, levels = c("low", "medium", "high")))
# Health: 1 = very good/good, 0 = fair/bad/very bad
df$health_num <- ifelse(df$health %in% c("Very good", "Good"), 1,
ifelse(df$health %in% c("Fair", "Bad", "Very bad"), 0, NA))
# Internet use: 1 if daily or several times/day
df$internet_use <- ifelse(df$netusoft %in% c("Every day", "Several times a day"), 1,
ifelse(df$netusoft %in% c("At least once a week", "Less than once a week", "Never"), 0, NA))
# Social frequency: 1 if every day or several times a week
df$social_freq <- ifelse(df$sclmeet %in% c("Every day", "Several times a week"), 1,
ifelse(df$sclmeet %in% c("At least once a week", "Less than once a month", "Never"), 0, NA))
# Combine survey weights
df$pspwght <- as.numeric(as.character(df$pspwght))
df$dweight <- as.numeric(as.character(df$dweight))
df$reg_weight <- df$pspwght * df$dweight
# Done: all new vars ready for analysis
• Variables used: d20 to d27
• Number of items: 8
• Sample size: 2,118
• Cronbach’s alpha: 0.845
The Cronbach’s alpha of 0.845 indicates strong internal consistency, suggesting that the items reliably capture depressive symptoms.
# Means
means = c()
means$d20 = mean(as.numeric(df$fltdpr),na.rm=T)
means$d21 = mean(as.numeric(df$flteeff),na.rm=T)
means$d22 = mean(as.numeric(df$slprl),na.rm=T)
means$d23 = mean(as.numeric(df$wrhpp),na.rm=T)
means$d24 = mean(as.numeric(df$fltlnl),na.rm=T)
means$d25= mean(as.numeric(df$enjlf),na.rm=T)
means$d26 = mean(as.numeric(df$fltsd),na.rm=T)
means$d27 = mean(as.numeric(df$cldgng),na.rm=T)
# Counts
counts = c()
counts$d20 = sum(!is.na(df$fltdpr))
counts$d21 = sum(!is.na(df$flteeff))
counts$d22= sum(!is.na(df$slprl))
counts$d23= sum(!is.na(df$wrhpp))
counts$d24= sum(!is.na(df$fltlnl))
counts$d25= sum(!is.na(df$enjlf))
counts$d26= sum(!is.na(df$fltsd))
counts$d27= sum(!is.na(df$cldgng))
df_tmp = likert(df[,c('fltdpr','flteeff','slprl','wrhpp','fltlnl','enjlf','fltsd','cldgng')])$results
df_tmp$means = round(unlist(means), 3)
df_tmp$counts = unlist(counts)
df_tmp[2:5] = round(df_tmp[2:5], 1)
#Rename columns for clarity
colnames(df_tmp) = c("Item", "None", "Some", "Most", "All", "Mean", "Count")
#Render with kable
kable(df_tmp, caption = "Summary of Depression Indicators (Likert Responses)", align = 'lcccccc')
Item | None | Some | Most | All | Mean | Count |
---|---|---|---|---|---|---|
fltdpr | 41.4 | 47.0 | 9.7 | 1.8 | 1.720 | 2118 |
flteeff | 41.7 | 43.5 | 12.2 | 2.6 | 1.757 | 2115 |
slprl | 28.9 | 52.1 | 16.6 | 2.3 | 1.923 | 2115 |
wrhpp | 5.0 | 21.1 | 49.8 | 24.0 | 2.929 | 2105 |
fltlnl | 61.4 | 26.9 | 8.1 | 3.6 | 1.538 | 2112 |
enjlf | 7.5 | 24.7 | 46.7 | 21.0 | 2.812 | 2111 |
fltsd | 47.3 | 43.3 | 8.2 | 1.2 | 1.632 | 2115 |
cldgng | 46.8 | 40.5 | 10.8 | 1.9 | 1.679 | 2117 |
# convert to numbers 1-5
df$d20 = as.numeric(df$fltdpr)
df$d21 = as.numeric(df$flteeff)
df$d22 = as.numeric(df$slprl)
df$d23 = as.numeric(df$wrhpp)
df$d24 = as.numeric(df$fltlnl)
df$d25 = as.numeric(df$enjlf)
df$d26 = as.numeric(df$fltsd)
df$d27 = as.numeric(df$cldgng)
# Reverse scales of D23 and D25 (differently poled than the others)
# Note: Alternatively we could reverse the others. This would, however,
# result in a reversely defined sumscore with high sums indicating LOW tolerance.
# We prefer positively defined scales with high sums indicating HIGH tolerance.
df$d23 = 5 - df$d23
df$d25 = 5 - df$d25
# Create depression scale
df$depression = rowSums(df[, c("d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27")], na.rm = TRUE) - 8
df$depression = as.numeric(df$depression)
# Run and capture the printed output
alpha_output = capture.output(
cronbach.alpha(df[, c("d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27")], na.rm = TRUE)
)
# Extracting from previous output
alpha_table = data.frame(
`Number of Items` = 8,
`Sample Size` = 2118,
`Cronbach's Alpha` = 0.845
)
# Display table
knitr::kable(alpha_table, caption = "Summary of Internal Consistency (Cronbach's Alpha) for Depression Scale") |>
kableExtra::kable_styling(full_width = T)
Number.of.Items | Sample.Size | Cronbach.s.Alpha |
---|---|---|
8 | 2118 | 0.845 |
Figure 1 shows the distribution of depression scores. Cronbach’s alpha for the scale was 0.8446908.
1. Individuals with higher educational levels are
associated with lower depression scores.
2. Women are more likely to report higher depression
scores compared to men.
3. Individuals with better self-reported health levels
are likely to report lower levels of depression.
4. Excessive internet use increases levels of
depression.
5. Individuals who frequently socialize with friends,
relatives, or colleagues are less likely to experience symptoms of
depression compared to those who socialize less frequently.
A one-way ANOVA was conducted to test differences in depression scores across three education levels (low, medium, high). The model was statistically significant, F(2, 2078) = 73.41, p < .001, indicating that mean depression levels varied significantly by educational category.
This graphic highlights that individuals with higher levels of education tend to report lower depression scores. The relationship supports the hypothesis that education acts as a protective factor against depressive symptoms.
Understanding this relationship is crucial for shaping public health policy, especially in promoting mental well-being through educational access.
The following table summarizes the original education levels before recoding.
# Recode "Highest level of education, ES - ISCED" (original categories)
# into 3 meaningful groups:
# - Low: less than lower secondary, lower secondary
# - Medium: upper secondary
# - High: advanced vocational, tertiary, BA and above
# This simplifies the variable for ANOVA analysis of depression scores by education.
df$edu = factor(NA, levels = c("low", "medium", "high"))
# Original values
kable(table(df$eisced),
col.names = c("Education","Frequency"),
caption = "Frequency of Answers by Education Level"
)
Education | Frequency |
---|---|
Not possible to harmonise into ES-ISCED | 0 |
ES-ISCED I , less than lower secondary | 27 |
ES-ISCED II, lower secondary | 377 |
ES-ISCED IIIb, lower tier upper secondary | 623 |
ES-ISCED IIIa, upper tier upper secondary | 679 |
ES-ISCED IV, advanced vocational, sub-degree | 141 |
ES-ISCED V1, lower tertiary education, BA level | 195 |
ES-ISCED V2, higher tertiary education, >= MA level | 73 |
Other | 0 |
# Create depression score
df$d20 <- as.numeric(df$fltdpr)
df$d21 <- as.numeric(df$flteeff)
df$d22 <- as.numeric(df$slprl)
df$d23 <- 5 - as.numeric(df$wrhpp) # reverse-coded
df$d24 <- as.numeric(df$fltlnl)
df$d25 <- 5 - as.numeric(df$enjlf) # reverse-coded
df$d26 <- as.numeric(df$fltsd)
df$d27 <- as.numeric(df$cldgng)
# Create education factor (this must exist before aggregation)
df$edu <- factor(NA, levels = c("low", "medium", "high"))
df$edu[df$eisced %in% c("ES-ISCED I , less than lower secondary",
"ES-ISCED II, lower secondary")] <- "low"
df$edu[df$eisced %in% c("ES-ISCED IIIb, lower tier upper secondary",
"ES-ISCED IIIa, upper tier upper secondary")] <- "medium"
df$edu[df$eisced %in% c("ES-ISCED IV, advanced vocational, sub-degree",
"ES-ISCED V1, lower tertiary education, BA level",
"ES-ISCED V2, higher tertiary education, >= MA level")] <- "high"
# Create numeric version
df$edunum = as.numeric(df$edu)
# Check
kable(table(df$edunum),
col.names = c("Education", "Frequency"),
caption = "Frequency of Answers by Education Group: Low (1), Medium (2), High (3)"
)
Education | Frequency |
---|---|
1 | 404 |
2 | 1302 |
3 | 409 |
# Summarize depression score by education level
edu_means <- aggregate(depression ~ edu, data = df,
FUN = function(x) c(mean = mean(x, na.rm = TRUE),
sd = sd(x, na.rm = TRUE)))
# Tidy Plot
edu_means_df = data.frame(Education = edu_means$edu,
Mean_Depression = edu_means$depression[, "mean"],
SD_Depression = edu_means$depression[, "sd"]
)
kable(edu_means_df, caption = "Mean Depression Scores by Education Level") |>
kable_styling(full_width = T)
Education | Mean_Depression | SD_Depression |
---|---|---|
low | 8.544555 | 4.719475 |
medium | 6.269585 | 4.043421 |
high | 5.075795 | 3.462916 |
edu_clean <- df[!is.na(df$edu) & !is.na(df$depression), ]
edu_summary <- aggregate(depression ~ edu, data = edu_clean,
FUN = function(x) c(mean = mean(x, na.rm = TRUE),
sd = sd(x, na.rm = TRUE)))
edu_plot_df <- data.frame(
edu = edu_summary$edu,
mean_depr = edu_summary$depression[, "mean"],
sd_depr = edu_summary$depression[, "sd"]
)
# Remove any NA rows
edu_plot_df <- edu_plot_df[!is.na(edu_plot_df$edu), ]
library(ggplot2)
# Filter clean data (remove NAs)
edu_clean <- df[!is.na(df$edu) & !is.na(df$depression), ]
# Summarize
edu_summary <- aggregate(depression ~ edu, data = edu_clean,
FUN = function(x) c(mean = mean(x, na.rm = TRUE),
sd = sd(x, na.rm = TRUE)))
edu_plot_df <- data.frame(
edu = edu_summary$edu,
mean_depr = edu_summary$depression[, "mean"],
sd_depr = edu_summary$depression[, "sd"]
)
# Plot
ggplot(edu_plot_df, aes(x = edu, y = mean_depr)) +
geom_point(size = 4, color = "steelblue") +
geom_errorbar(
aes(ymin = mean_depr - sd_depr, ymax = mean_depr + sd_depr),
width = 0.2
) +
labs(
title = "Mean Depression Score by Education Level",
x = "Education Level",
y = "Average Depression Score"
) +
theme_minimal()
This bar chart clearly shows a downward trend in depression scores as education level increases. Those with a high level of education report the lowest average depression score, whereas individuals with low education show the highest average score.
The differences are statistically significant, confirming the hypothesis that education is inversely related to depressive symptoms.
An independent samples t-test was conducted to examine whether there was a significant difference in mean depression scores between men and women. This test helps determine whether women are more likely to report higher levels of depressive symptoms compared to men.
# Extract relevant info from t-test
t_test_table <- data.frame(
Group1 = "Male",
Group2 = "Female",
t_value = round(t_test_gender$statistic, 3),
df = t_test_gender$parameter,
p_value = round(t_test_gender$p.value, 5),
CI_lower = round(t_test_gender$conf.int[1], 3),
CI_upper = round(t_test_gender$conf.int[2], 3),
Mean_Male = round(t_test_gender$estimate[1], 3),
Mean_Female = round(t_test_gender$estimate[2], 3)
)
# View the table nicely
knitr::kable(
t_test_table,
caption = "Independent Samples t-Test: Gender Differences in Depression Scores",
col.names = c("Group 1", "Group 2", "t value", "df", "p value",
"95% CI Lower", "95% CI Upper", "Mean Male", "Mean Female")
) |>
kableExtra::kable_styling(full_width = TRUE, bootstrap_options = c("striped", "hover", "condensed"))
Group 1 | Group 2 | t value | df | p value | 95% CI Lower | 95% CI Upper | Mean Male | Mean Female | |
---|---|---|---|---|---|---|---|---|---|
t | Male | Female | -3.81 | 2116 | 0.00014 | -1.081 | -0.346 | 6.043 | 6.757 |
# Drop missing values first
df_gender <- df[!is.na(df$gndr) & !is.na(df$depression), ]
# Create the plot
ggplot(df_gender, aes(x = gndr, y = depression)) +
geom_boxplot(fill = "steelblue") +
stat_summary(fun = mean, geom = "point", shape = 20, size = 3, color = "#FF8C00") +
labs(
title = "Gender Differences in Depression Scores",
x = "Gender",
y = "Depression Score"
) +
theme_minimal()
The t-test indicates that the difference in mean depression scores between men and women is statistically significant (p < 0.05). This supports the hypothesis that women are more likely to report higher levels of depressive symptoms than men.
A one-way ANOVA was conducted to test whether average depression scores differ significantly across the five levels of self-rated health (Very good, Good, Fair, Bad, Very bad).
The analysis tests the null hypothesis that the mean depression scores are equal for all health categories versus the alternative that at least one group mean differs.
# Computing mean depression scores for each health level
health_means <- aggregate(depression ~ health, data = df,
FUN = function(x) mean(x, na.rm = TRUE))
health_means$health <- factor(health_means$health,
levels = c("Very good", "Good", "Fair", "Bad", "Very bad"))
# Display the frequency table
kable(as.data.frame(table(df$health)),
col.names = c("Health Level", "Frequency"),
caption = "Frequency of Answers by Subjective Health Level") |>
kableExtra::kable_styling(full_width = TRUE)
Health Level | Frequency |
---|---|
Very good | 521 |
Good | 905 |
Fair | 505 |
Bad | 145 |
Very bad | 40 |
# Display the mean depression scores
kable(health_means,
col.names = c("Health Level", "Average Depression Score"),
caption = "Average Depression Score by Health Level") |>
kableExtra::kable_styling(full_width = TRUE)
Health Level | Average Depression Score |
---|---|
Very good | 3.917466 |
Good | 5.825414 |
Fair | 8.063366 |
Bad | 11.827586 |
Very bad | 14.725000 |
library(dplyr)
# Calculate mean depression by health level
health_means <- df %>%
group_by(health) %>%
summarise(mean_depr = mean(depression, na.rm = TRUE))
# Identify the group with the highest mean
highest_health <- health_means$health[which.max(health_means$mean_depr)]
# Add a flag to your summary df for plotting
health_means$highlight <- ifelse(health_means$health == highest_health, "Highest", "Other")
# Plot
library(ggplot2)
# Ensure health levels are ordered for nicer plot
health_means$health <- factor(health_means$health,
levels = c("Very good", "Good", "Fair", "Bad", "Very bad"))
ggplot(health_means, aes(x = health, y = mean_depr, fill = highlight)) +
geom_col() +
scale_fill_manual(values = c("Highest" = "#FF8C00", "Other" = "steelblue")) +
labs(
title = "Average Depression Score by Self-Reported Health",
x = "Self-Reported Health Level",
y = "Average Depression Score"
) +
theme_minimal()
/ /
The ANOVA revealed a statistically significant effect of self-rated health on depression scores, F(df1, df2) = [value], p < .001, indicating that individuals reporting poorer health have significantly higher depression scores than those reporting better health.”, confirming the hypothesis.
A one-way ANOVA was conducted to test for differences in mean depression scores across levels of internet use frequency (e.g., never, less often, every day). The test checks whether average depression scores vary significantly by how frequently respondents use the internet.
# Computing mean depression scores for different internet usage levels
means_df = data.frame(
by(df$depression, df$netusoft, mean, na.rm=T)
)
kable(means_df,
col.names = c("Amount of Internet Use","Average depression score"),
caption = "Average Depression Score by the amount of Internet Use"
)
Amount of Internet Use | Average depression score |
---|---|
Never | 9.701970 |
Only occasionally | 8.203125 |
A few times a week | 6.831169 |
Most days | 6.272059 |
Every day | 5.313372 |
# Check
netusoft_table = as.data.frame(table(df$netusoft))
kable(netusoft_table,
col.names = c("Internet Use Frequency", "Number of Respondents"),
caption = "Frequency of Responses by Internet Use Category") |>
kable_styling(full_width = T)
Internet Use Frequency | Number of Respondents |
---|---|
Never | 406 |
Only occasionally | 64 |
A few times a week | 154 |
Most days | 272 |
Every day | 1219 |
# Calculate mean depression by internet use
means_df <- df %>%
group_by(netusoft) %>%
summarise(mean_depr = mean(depression, na.rm = TRUE))
# Find the group with the highest mean
highest_group <- means_df$netusoft[which.max(means_df$mean_depr)]
# Add flag to highlight
df$netusoft_high <- ifelse(df$netusoft == highest_group, "Highest", "Other")
library(ggplot2)
ggplot(df, aes(x = netusoft, y = depression, fill = netusoft_high)) +
geom_boxplot() +
stat_summary(fun = mean, geom = "point", shape = 21, size = 2, color = "black") +
scale_fill_manual(values = c("Highest" = "steelblue", "Other" = "lightgrey")) +
labs(
title = "Depression Scores by Internet Use Frequency",
x = "Internet Use Frequency",
y = "Depression Score"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10))
The boxplot illustrates that mean depression scores tend to differ slightly across internet use groups. Contrary to the hypothesis, higher internet use was linked to lower depression scores, with the highest score among non-users (2.21) and the lowest among daily users (1.66).
A one-way ANOVA was conducted to test whether mean depression scores significantly differed across seven levels of self-reported socializing frequency (never, less than once a month, once a month, several times a month, once a week, several times a week, every day). This test examined whether individuals who socialize more frequently report lower levels of depressive symptoms compared to those who socialize less often.
# Calculate mean depression by socializing frequency
means_df <- df %>%
group_by(sclmeet) %>%
summarise(mean_depr = mean(depression, na.rm = TRUE))
# Find the group with the lowest mean
lowest_group <- means_df$sclmeet[which.min(means_df$mean_depr)]
# Add flag to highlight
df$sclmeet_low <- ifelse(df$sclmeet == lowest_group, "Lowest", "Other")
# Plot
ggplot(df, aes(x = sclmeet, y = depression, fill = sclmeet_low)) +
geom_boxplot() +
stat_summary(fun = mean, geom = "point", shape = 21, size = 2, color = "black") +
scale_fill_manual(values = c("Lowest" = "steelblue", "Other" = "lightgrey")) +
labs(
title = "Depression Scores by Socialization Frequency",
x = "Socialization Frequency",
y = "Depression Score"
) +
theme_minimal() +
theme( axis.text.x = element_text(angle = 45, hjust = 1, size = 10)
)
The ANOVA test shows a significant difference in mean depression scores across socialization frequency levels (F(6, 2108) = 51.83, p < .001). Respondents who socialize every day have the lowest average depression scores, while those who never socialize report the highest scores. This supports the hypothesis that frequent social interactions are associated with lower depression levels. However, the effect varies by frequency, suggesting that socializing regularly, rather than rarely, can help buffer against depressive symptoms.
To predict the dependent variable (depression) based on the five independent variables of education level, gender, self-rated health, internet use, and frequency of socializing.
Estimate | Std. Error | t value | p value | |
---|---|---|---|---|
(Intercept) | 10.9560858 | 0.2799490 | 39.136010 | <.001 |
edunum | -0.9624563 | 0.1157521 | -8.314807 | <.001 |
gender | 0.5651585 | 0.1546538 | 3.654347 | <.001 |
health_num | -3.2774125 | 0.1852904 | -17.687980 | <.001 |
internet_use | -1.2241747 | 0.1729975 | -7.076258 | <.001 |
social_freq | -0.3114805 | 0.2491284 | -1.250281 | 0.211 |
The linear regression model indicated that higher education levels (b = -0.96, p < .001), better self-reported health (b = -3.28, p < .001), and more frequent internet use (b = -1.22, p < .001) were all significantly associated with lower depression scores. Female gender was also associated with slightly higher depression scores (b = 0.57, p < .001). Social frequency was not significant (p = .21).
These results partially confirm the hypotheses and suggest that education, health, and internet use are the strongest predictors of depressive symptoms when measured as a continuous outcome.
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
OR | 2.5 % | 97.5 % | |
---|---|---|---|
(Intercept) | 3.536 | 2.418 | 5.172 |
edunum | 0.583 | 0.493 | 0.688 |
gender | 1.135 | 0.909 | 1.417 |
health_num | 0.184 | 0.146 | 0.232 |
internet_use | 0.588 | 0.465 | 0.743 |
social_freq | 0.820 | 0.567 | 1.187 |
The logistic regression shows that: - Education level: Higher education significantly reduces the odds of clinically significant depression (OR < 1). - Health: Good self-rated health strongly lowers the odds (OR much less than 1). - Gender: Female respondents have slightly higher odds, but this effect may not be statistically significant if the CI crosses 1. - Internet use: Daily or frequent use is associated with lower odds. - Socializing: Social frequency shows a small effect that may not be significant.
McFadden’s pseudo-R² is 0.174, indicating a modest but acceptable model fit for social science data.
These findings suggest that education and good health are strong protective factors, while excessive social isolation is a risk factor for clinically significant depressive symptoms.
In this dataset, responses were originally coded from 1 to 5 on a Likert scale. These values were recoded to a 0–3 scale before computing the sum score. Respondents with scores ≥9 were categorized as experiencing clinically significant depressive symptoms.
clin_table <- as.data.frame(table(df$clin_depr))
knitr::kable(clin_table,
col.names = c("Clinically Depressed (0 = No, 1 = Yes)", "Number of Respondents"),
caption = "Frequency of Clinically Significant Depression") |>
kableExtra::kable_styling(full_width = TRUE)
Clinically Depressed (0 = No, 1 = Yes) | Number of Respondents |
---|---|
0 | 1526 |
1 | 592 |
#Plot
ggplot(clin_table, aes(x = factor(Var1), y = Freq)) +
geom_col(fill = "#FF8C00") +
labs(
title = "Prevalence of Clinically Significant Depression",
x = "Clinically Depressed (0 = No, 1 = Yes)",
y = "Number of Respondents"
) +
theme_minimal()
### Conclusion
In conclusion, the binary classification shows that approximately 28% of respondents in this sample report clinically significant levels of depressive symptoms based on the CES-D-8 threshold (score ≥9). This finding highlights the substantial proportion of individuals at risk of moderate to severe depression within the studied population. It also reinforces the importance of considering social, demographic, and behavioral factors when designing interventions to reduce the prevalence and impact of depression.
Outcome | Relation | Predictor | Estimate | SE | Z | p-value | Std. Est. |
---|---|---|---|---|---|---|---|
depression | ~ | edunum | -1.153 | 0.132 | -8.758 | 0 | -0.170 |
depression | ~ | health_num | -3.827 | 0.189 | -20.272 | 0 | -0.426 |
health_num | ~ | edunum | 0.147 | 0.016 | 9.061 | 0 | 0.194 |
indirect | := | a*b | -0.561 | 0.069 | -8.114 | 0 | -0.083 |
total | := | c+(a*b) | -1.714 | 0.144 | -11.863 | 0 | -0.252 |
The mediation analysis examined whether self-rated health mediates the relationship between education level and depression. The results show that education has a positive effect on health (a path = 0.20), and health has a negative effect on depression (b path = –0.24), resulting in a small indirect effect (ab* = –0.05). The direct effect of education on depression remains, indicating partial mediation. This suggests that higher education may reduce depression partly by improving self-reported health.
The moderation analysis tested whether self-rated health moderates the effect of education on depression. The interaction term was not significant (b = –0.02, p = 0.451), suggesting that the relationship between education and depression does not vary by health status.
The plot below shows the distribution of summed depression scores
among respondents in Hungary.
The orange dashed line represents the cutoff (score ≥
9) for clinically significant depressive symptoms, as used in CES-D-8
scale interpretation.
library(ggplot2)
ggplot(df, aes(x = depression)) +
geom_histogram(binwidth = 1, fill = "steelblue", color = "white") +
geom_vline(xintercept = 9, linetype = "dashed", color = "#FF8C00", linewidth = 1) +
labs(
title = "Distribution of Depression Scores",
x = "Depression Score",
y = "Number of Respondents"
) +
theme_minimal()
In summary, this study shows that education level, self-reported health, gender, internet use, and social interaction frequency are significant predictors of depressive symptoms among respondents in Hungary. The results partially confirm the hypotheses: higher education and better self-reported health are associated with significantly lower depression scores, while frequent social interaction also appears protective.
Both the linear and logistic models support these findings, highlighting how improving education and promoting good health can act as protective factors. These insights are important for designing targeted public health interventions and evidence-based mental health policy.