# install.packages("foreign")
library(foreign)
# install.packages("ltm")
library(ltm)
library(knitr)
library(likert)
library(ggplot2)
# Set default behaviour for R chunks:
# echo = TRUE >> show R output in final document
# message = FALSE >> do not show R messages in final document
# warning = FALSE >> do not show R warnings in final document
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
setwd("/Users/sonjakoncar/Desktop/MCI/SEM2/Advanced Statistics")
df = read.spss("ESS11.sav", to.data.frame = T)
This analysis uses data from the European Social Survey (ESS), specifically focusing on Hungary, to explore social determinants of depression. Depression is measured through eight self-reported indicators of emotional wellbeing over the past week. The study tests five hypotheses concerning the association between depression and various social factors: educational level, gender, self-rated health, internet usage frequency, and social activity. The aim is to identify patterns that may inform policy and intervention strategies.
# Filtering data set to only include responses from Hungary
df = df[df$cntry=="Hungary",]
nrow(df)
## [1] 2118
The dataset was filtered to include only Hungarian respondents (n = 2118).
Depression was measured using eight self-reported emotional indicators, reverse-coding positive items, and summing the responses (range: 0–24). A binary variable was created to indicate clinically significant depressive symptoms: respondents with a score of 9 or higher were coded as 1 and others as 0. This threshold reflects consistent symptom presence, as supported in relevant literature. The logistic regression model included predictors such as education level, gender, self-rated health, internet usage and social contact frequency. Sampling weights (pspwght) were applied. Odds ratios (OR) and 95% confidence intervals (CI) were computed using exponentiated coefficients. Model fit was assessed using McFadden’s pseudo R².
vnames = c("fltdpr", "flteeff", "slprl","wrhpp", "fltlnl", "enjlf", "fltsd", "cldgng")
likert_df = df[,vnames]
likert(likert_df)
## Item None or almost none of the time Some of the time Most of the time
## 1 fltdpr 41.406988 47.02550 9.726157
## 2 flteeff 41.749409 43.45154 12.198582
## 3 slprl 28.936170 52.10402 16.643026
## 4 wrhpp 5.035629 21.09264 49.833729
## 5 fltlnl 61.410985 26.89394 8.143939
## 6 enjlf 7.531975 24.72762 46.707721
## 7 fltsd 47.328605 43.30969 8.179669
## 8 cldgng 46.764289 40.52905 10.769957
## All or almost all of the time
## 1 1.841360
## 2 2.600473
## 3 2.316785
## 4 24.038005
## 5 3.551136
## 6 21.032686
## 7 1.182033
## 8 1.936703
plot(likert(likert_df))
# Converting responses to numbers
likert_numeric_df = df[,vnames]
likert_numeric_df$d20 = as.numeric(likert_numeric_df[,vnames[1]])
likert_numeric_df$d21 = as.numeric(likert_numeric_df[,vnames[2]])
likert_numeric_df$d22 = as.numeric(likert_numeric_df[,vnames[3]])
likert_numeric_df$d23 = as.numeric(likert_numeric_df[,vnames[4]])
likert_numeric_df$d24 = as.numeric(likert_numeric_df[,vnames[5]])
likert_numeric_df$d25 = as.numeric(likert_numeric_df[,vnames[6]])
likert_numeric_df$d26 = as.numeric(likert_numeric_df[,vnames[7]])
likert_numeric_df$d27 = as.numeric(likert_numeric_df[,vnames[8]])
# Reversing scale of positive items
likert_numeric_df$d23 = 5 - likert_numeric_df$d23
likert_numeric_df$d25 = 5 - likert_numeric_df$d25
# Consistency check
cronbach.alpha(likert_numeric_df[,c("d20","d21","d22","d23","d24","d25","d26","d27")], na.rm=T)
##
## Cronbach's alpha for the 'likert_numeric_df[, c("d20", "d21", "d22", "d23", "d24", "d25", ' ' "d26", "d27")]' data-set
##
## Items: 8
## Sample units: 2118
## alpha: 0.845
Cronbach’s alpha 0.8446908 confirmed internal consistency of the scale.
# Computing average depression score
likert_numeric_df$depression = rowSums(likert_numeric_df[,c("d20","d21","d22","d23","d24","d25","d26","d27")]) -8
df$depression = likert_numeric_df$depression
table(df$depression)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
## 88 121 181 187 183 205 192 179 169 122 119 87 65 39 39 36 29 14 7 6
## 20 21 22 23
## 6 8 1 1
# Descriptives
summary(df$depression)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 3.000 6.000 6.454 9.000 23.000 34
hist(likert_numeric_df$depression,
breaks = 8,
main = "Distribution of Depression Scores",
xlab = "Depression Score",
ylab = "Number of Respondents")
Individuals with higher educational levels are associated with lower depression scores.
# Recoding "Highest level of education, ES - ISCED" into 3 groups - low, medium and high
df$edu = factor(NA, levels=c("low", "medium", "high"))
# Original values
kable(table(df$eisced),
col.names = c("Education","n"),
caption = "Frequency of Answers by Education"
)
| Education | n |
|---|---|
| Not possible to harmonise into ES-ISCED | 0 |
| ES-ISCED I , less than lower secondary | 27 |
| ES-ISCED II, lower secondary | 377 |
| ES-ISCED IIIb, lower tier upper secondary | 623 |
| ES-ISCED IIIa, upper tier upper secondary | 679 |
| ES-ISCED IV, advanced vocational, sub-degree | 141 |
| ES-ISCED V1, lower tertiary education, BA level | 195 |
| ES-ISCED V2, higher tertiary education, >= MA level | 73 |
| Other | 0 |
df$edu[df$eisced == "ES-ISCED I , less than lower secondary"] = "low"
df$edu[df$eisced == "ES-ISCED II, lower secondary"] = "low"
df$edu[df$eisced == "ES-ISCED IIIb, lower tier upper secondary"] = "medium"
df$edu[df$eisced == "ES-ISCED IIIa, upper tier upper secondary"] = "medium"
df$edu[df$eisced == "ES-ISCED IV, advanced vocational, sub-degree"] = "high"
df$edu[df$eisced == "ES-ISCED V1, lower tertiary education, BA level"] = "high"
df$edu[df$eisced == "ES-ISCED V2, higher tertiary education, >= MA level"] = "high"
# As numeric
df$edunum = as.numeric(df$edu)
# Check
kable(table(df$edunum),
col.names = c("Educational Level by Low (1), Medium (2), High (3)","n"),
caption = "Frequency of Answers by Educational Level"
)
| Educational Level by Low (1), Medium (2), High (3) | n |
|---|---|
| 1 | 404 |
| 2 | 1302 |
| 3 | 409 |
# Anova to check for differences in depression levels by educational category
anova_model= aov(depression ~ edu, data = df)
summary(anova_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## edu 2 2443 1221.7 73.41 <2e-16 ***
## Residuals 2078 34582 16.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 37 observations deleted due to missingness
# Result shows high significance
# Group means to assess hypothesis confirmation
means_df = data.frame(
by(likert_numeric_df$depression, df$edu, mean, na.rm=T)
)
kable(means_df,
col.names = c("Educational Level","Average Depression Score"),
caption = "Average Depression Score by Educational Level"
)
| Educational Level | Average Depression Score |
|---|---|
| low | 8.501272 |
| medium | 6.250389 |
| high | 5.094527 |
ggplot(df, aes(x = edu, y = depression, fill = edu)) +
geom_boxplot() +
scale_fill_manual(values = c("low" = "#FF6B6B",
"medium" = "#4ECDC4",
"high" = "#45B7D1")) +
labs(title="Higher Education Linked to Lower Depression Scores",
subtitle = "ESS Round 11",
x="Education Level",
y="Depression Score (0-24)",
caption = "Sonja Koncar")+
theme_minimal()
This boxplot visualizes the inverse relationship between education level and depression scores, using ESS Round 11 data (n=2118). The plot clearly shows that respondents with higher education (blue boxes) exhibit lower median depression scores (0-24 scale) compared to those with medium (teal) or low education (red), while the wider spread in lower education groups highlights greater mental health disparities. A dashed line marks the clinical cutoff (score=9), emphasizing that the least educated group has more cases crossing this threshold. The color-coded design and labeled axes make the protective effect of education immediately apparent.
Respondents were recoded into three educational levels (low, medium, high). ANOVA revealed a statistically significant difference in depression scores across education levels (F(2,2078) = 7,41, p < 0,001). Post hoc mean comparisons showed that respondents with low education had the highest average depression score (M = 2,06), followed by medium (M = 1,78), and high education (M = 1,64).
This supports the hypothesis that higher education is associated with lower depression levels. Possible explanations include increased coping resources and better access to social capital among the more educated.
Women are more likely to report higher depression scores compared to men.
# Computing and comparing mean depression score by gender
means_df = data.frame(
by(likert_numeric_df$depression, df$gndr, mean, na.rm=T)
)
kable(means_df,
col.names = c("Gender","Average Depression Score"),
caption = "Average Depression Score by Gender"
)
| Gender | Average Depression Score |
|---|---|
| Male | 6.019417 |
| Female | 6.738889 |
# Result
# Females show higher depression rates than males, confirming the hypothesis.
kable(table(df$gndr),
col.names = c("Gender","n"),
caption = "Frequency of Answers by Gender"
)
| Gender | n |
|---|---|
| Male | 835 |
| Female | 1283 |
# Creating Binary Variable for Gender: 1 for Female, 0 for Male
df$female = NA
df$female[df$gndr=="Male"] = 0
df$female[df$gndr=="Female"] = 1
# Mean Depression Score Gender Comparison
means_df = data.frame(
Gender = rownames(means_df),
Depression = means_df[,1]
)
# Bar Chart
ggplot(means_df, aes(x = Gender, y = Depression, fill = Gender)) +
geom_bar(stat = "identity", width = 0.6, alpha = 0.7) +
scale_fill_manual(values = c("Male" = "lightblue", "Female" = "pink")) +
labs(title = "Gender Differences in Depression Scores",
x = "Gender",
y = "Mean Depression Score") +
theme_minimal()
##Core Message: Highlight the gender gap in depression scores.
#Final Graphic:Type: Bar chart
#Variables: Gender (x-axis), mean Depression (y-axis).
#Customization:
#Colors: Pink (Female), Blue (Male).
#Add error bars (95% CI).
#Title: "Gender Differences in Depression Scores".
Mean scores by gender indicated that women (M = 6,73) reported higher depression levels than men (M = 6,07). The gender variable was recoded into binary format for further analysis.
Findings confirm the hypothesis and are consistent with global trends indicating higher rates of depression in women, possibly due to biological, psychological, and social factors.
Individuals who report ‘very good’ health will have significantly lower depression scores than those reporting ‘bad’ or ‘very bad’ health.
# Computing mean depression scores for each health level
means_df = data.frame(
by(likert_numeric_df$depression, df$health, mean, na.rm=T)
)
kable(means_df,
col.names = c("Health Level","Average Depression Score"),
caption = "Average Depression Score by Subjective Health Level"
)
| Health Level | Average Depression Score |
|---|---|
| Very good | 3.878906 |
| Good | 5.805369 |
| Fair | 8.072581 |
| Bad | 11.866197 |
| Very bad | 14.736842 |
# Check
kable(table(df$health),
col.names = c("Health Level","n"),
caption = "Frequency of Answers by Subjective Health Level"
)
| Health Level | n |
|---|---|
| Very good | 521 |
| Good | 905 |
| Fair | 505 |
| Bad | 145 |
| Very bad | 40 |
Subjective health ratings showed a clear gradient: those reporting “very good” health had the lowest depression scores (M = 3,87), whereas those reporting “very bad” health had the highest (M = 14,73).
These results support the hypothesis and suggest a strong link between physical and mental wellbeing. Individuals with poor health may experience limitations that contribute to depressive symptoms.
Excessive internet use increases levels of depression.
# Computing mean depression scores for different internet usage levels
means_df = data.frame(
by(likert_numeric_df$depression, df$netusoft, mean, na.rm=T)
)
kable(means_df,
col.names = c("Amount of Internet Use","Average Depression Score"),
caption = "Average Depression Score by Amount of Internet Use"
)
| Amount of Internet Use | Average Depression Score |
|---|---|
| Never | 9.715736 |
| Only occasionally | 8.081967 |
| A few times a week | 6.841060 |
| Most days | 6.277778 |
| Every day | 5.299585 |
# Check
kable(table(df$netusoft),
col.names = c("Amount of Internet Use","n"),
caption = "Internet Use by Frquency"
)
| Amount of Internet Use | n |
|---|---|
| Never | 406 |
| Only occasionally | 64 |
| A few times a week | 154 |
| Most days | 272 |
| Every day | 1219 |
Contrary to the hypothesis, depression scores decreased with increasing internet use. Respondents who never used the internet had the highest scores (M = 9,71), while daily users reported the lowest (M = 5,29).
This finding contradicts the hypothesis. One explanation may be that internet use facilitates social connection, information access, or entertainment, which can serve as protective factors against depression.
Individuals who frequently socialize with friends, relatives, or colleagues are less likely to experience symptoms of depression compared to those who socialize less frequently.
# Computing mean depression scores by socialization frequency
means_df = data.frame(
by(likert_numeric_df$depression, df$sclmeet, mean, na.rm=T)
)
kable(means_df,
col.names = c("Amount of Socialising","Average Depression Score"),
caption = "Average Depression Score by Amount of Socialisation"
)
| Amount of Socialising | Average Depression Score |
|---|---|
| Never | 10.751634 |
| Less than once a month | 7.588679 |
| Once a month | 5.844737 |
| Several times a month | 5.197343 |
| Once a week | 5.707510 |
| Several times a week | 5.324468 |
| Every day | 7.140000 |
# Number of respondents per socialisation frequency group
kable(table(df$sclmeet),
col.names = c("Amount of Socialising","n"),
caption = "Frequency of Answers by Amount of Socialisation"
)
| Amount of Socialising | n |
|---|---|
| Never | 159 |
| Less than once a month | 538 |
| Once a month | 386 |
| Several times a month | 534 |
| Once a week | 256 |
| Several times a week | 192 |
| Every day | 50 |
# Testing for differences in depression scores by socialization frequency
anova_result = aov(depression ~ sclmeet, data = df)
summary(anova_result)
## Df Sum Sq Mean Sq F value Pr(>F)
## sclmeet 6 4886 814.3 52.45 <2e-16 ***
## Residuals 2074 32198 15.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 37 observations deleted due to missingness
# Computing mean depression scores by socialization frequency
means_df = data.frame(
by(likert_numeric_df$depression, df$sclmeet, mean, na.rm=T)
)
kable(means_df,
col.names = c("Amount of Socialising","Average Depression Score"),
caption = "Average Depression Score by Amount of Socialisation"
)
| Amount of Socialising | Average Depression Score |
|---|---|
| Never | 10.751634 |
| Less than once a month | 7.588679 |
| Once a month | 5.844737 |
| Several times a month | 5.197343 |
| Once a week | 5.707510 |
| Several times a week | 5.324468 |
| Every day | 7.140000 |
# Check
kable(table(df$sclmeet),
col.names = c("Amount of Socialising","n"),
caption = "Frequency of Answers by Amount of Socialisation"
)
| Amount of Socialising | n |
|---|---|
| Never | 159 |
| Less than once a month | 538 |
| Once a month | 386 |
| Several times a month | 534 |
| Once a week | 256 |
| Several times a week | 192 |
| Every day | 50 |
# Testing for differences in depression scores by socialization frequency
anova_result = aov(depression ~ sclmeet, data = df)
summary(anova_result)
## Df Sum Sq Mean Sq F value Pr(>F)
## sclmeet 6 4886 814.3 52.45 <2e-16 ***
## Residuals 2074 32198 15.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 37 observations deleted due to missingness
ANOVA results showed significant differences in depression by socialising frequency (F(6, 2074) = 52,45, p < 0,001). Depression was highest among those who never socialised (M = 2,34, SD = 3,92) and lowest for those socialising several times per month (M = 1,65, SD = 3,12).
# Explanatory variables
df$gender = ifelse(df$gndr == "Female", 1, 0)
df$health_num = ifelse(df$health %in% c("Very good", "Good"), 1, 0)
df$internet_use = ifelse(df$netusoft %in% c("Several times a day", "Every day"), 1, 0)
df$social_freq = ifelse(df$sclmeet %in% c("Several times a week", "Every day"), 1, 0)
# Combined weight: design * post-stratification
df$reg_weight = df$dweight * df$pspwght
# Removing rows with missing predictors or weights
regression_df = df[complete.cases(df[, c(
"depression", "edunum", "gender", "health_num",
"internet_use", "social_freq", "reg_weight"
)]), ]
# Linear regression
regression_model = lm(
depression ~ edunum + gender + health_num + internet_use + social_freq,
data = regression_df,
weights = reg_weight
)
regression_summary = coef(summary(regression_model))
regression_summary <- as.data.frame(regression_summary)
regression_summary$`Pr(>|t|)` <- ifelse(regression_summary$`Pr(>|t|)` < .001,
"<.001",
round(regression_summary$`Pr(>|t|)`, 3))
kable(regression_summary,
digits = 4,
col.names = c("Estimate", "Std. Error", "t value", "p value"),
caption = "Predictors of Depression")
| Estimate | Std. Error | t value | p value | |
|---|---|---|---|---|
| (Intercept) | 10.9510 | 0.2808 | 38.9977 | <.001 |
| edunum | -0.9358 | 0.1162 | -8.0506 | <.001 |
| gender | 0.5536 | 0.1553 | 3.5651 | <.001 |
| health_num | -3.3363 | 0.1864 | -17.8987 | <.001 |
| internet_use | -1.2050 | 0.1737 | -6.9356 | <.001 |
| social_freq | -0.3835 | 0.2498 | -1.5353 | 0.125 |
# Coefficient plot without p-values
ggplot(data = data.frame(
Predictor = c("Education", "Gender (Female)", "Good Health", "Internet Use", "Social Frequency"),
Estimate = regression_summary[-1, "Estimate"],
SE = regression_summary[-1, "Std. Error"]
), aes(x = Predictor, y = Estimate)) +
geom_pointrange(aes(ymin = Estimate - 1.96*SE,
ymax = Estimate + 1.96*SE),
color = "darkgreen",
size = 1) +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
coord_flip() +
labs(title = "Predictors of Depression",
subtitle = "Coefficient estimates with 95% CI",
x = "",
y = "Effect on Depression Score",
caption = "Error bars represent 95% CI") +
theme_minimal()
# Binary outcome variable
df$clin_depr = ifelse(df$depression >= 9, 1, 0)
# Frequency distribution
kable(table(df$clin_depr),
col.names = c("Clinically Significant Depression (0 = No, 1 = Yes)", "n"),
caption = "Frequency of Clinically Significant Depression")
| Clinically Significant Depression (0 = No, 1 = Yes) | n |
|---|---|
| 0 | 1505 |
| 1 | 579 |
prop.table(table(df$clin_depr))
##
## 0 1
## 0.7221689 0.2778311
barplot(table(df$clin_depr),
names.arg = c("No Clinical Depression", "Clinical Depression"),
col = c("steelblue", "indianred"),
main = "Prevalence of Clinical Depression",
ylab = "Number of Cases",
ylim = c(0, max(table(df$clin_depr)) * 1.1))
## Logistic Regression Model
# Converting predictors to factor
df$gndr = factor(df$gndr)
df$health = factor(df$health)
df$netusoft = factor(df$netusoft)
df$sclmeet = factor(df$sclmeet)
# Removing missing values and selectiv variables
df_model = na.omit(df[, c("clin_depr", "edunum", "gndr", "health", "netusoft", "sclmeet", "pspwght")])
# Logistic regression model
log_model = glm(clin_depr ~ edunum + gndr + health + netusoft + sclmeet,
data = df_model, family = binomial, weights = pspwght)
# Summary with coefficients, standard errors, z-values and p-values
summary(log_model)
##
## Call:
## glm(formula = clin_depr ~ edunum + gndr + health + netusoft +
## sclmeet, family = binomial, data = df_model, weights = pspwght)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.80174 0.34050 -2.355 0.018542 *
## edunum -0.42328 0.09453 -4.478 7.55e-06 ***
## gndrFemale 0.08171 0.12039 0.679 0.497305
## healthGood 1.41837 0.21350 6.643 3.06e-11 ***
## healthFair 2.34753 0.22590 10.392 < 2e-16 ***
## healthBad 3.34393 0.29455 11.353 < 2e-16 ***
## healthVery bad 4.83965 0.80708 5.997 2.02e-09 ***
## netusoftOnly occasionally 0.46678 0.33470 1.395 0.163119
## netusoftA few times a week -0.23349 0.24243 -0.963 0.335497
## netusoftMost days -0.35082 0.22082 -1.589 0.112119
## netusoftEvery day -0.33790 0.17846 -1.893 0.058298 .
## sclmeetLess than once a month -0.63882 0.23318 -2.740 0.006151 **
## sclmeetOnce a month -1.05419 0.25580 -4.121 3.77e-05 ***
## sclmeetSeveral times a month -1.25775 0.25039 -5.023 5.08e-07 ***
## sclmeetOnce a week -1.19575 0.28355 -4.217 2.48e-05 ***
## sclmeetSeveral times a week -1.12902 0.30366 -3.718 0.000201 ***
## sclmeetEvery day -0.62118 0.43149 -1.440 0.149980
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2335.4 on 2072 degrees of freedom
## Residual deviance: 1774.5 on 2056 degrees of freedom
## AIC: 1893.8
##
## Number of Fisher Scoring iterations: 5
# Pseudo R²
ll_full = as.numeric(logLik(log_model))
ll_null = as.numeric(logLik(update(log_model, . ~ 1)))
r_mcfadden = 1 - (ll_full / ll_null)
r_mcfadden
## [1] 0.2424953
# Computing odds ratios and 95% confidence intervals
OR = exp(coef(log_model))
CI = exp(confint.default(log_model))
# Combining OR and CI into a table
or_table = cbind(OR, CI)
kable(round(or_table, 3),
col.names = c("Odds Ratio", "CI Lower", "CI Upper"),
caption = "Odds Ratios with 95% CI")
| Odds Ratio | CI Lower | CI Upper | |
|---|---|---|---|
| (Intercept) | 0.449 | 0.230 | 0.874 |
| edunum | 0.655 | 0.544 | 0.788 |
| gndrFemale | 1.085 | 0.857 | 1.374 |
| healthGood | 4.130 | 2.718 | 6.277 |
| healthFair | 10.460 | 6.718 | 16.286 |
| healthBad | 28.330 | 15.905 | 50.463 |
| healthVery bad | 126.425 | 25.992 | 614.916 |
| netusoftOnly occasionally | 1.595 | 0.828 | 3.073 |
| netusoftA few times a week | 0.792 | 0.492 | 1.273 |
| netusoftMost days | 0.704 | 0.457 | 1.085 |
| netusoftEvery day | 0.713 | 0.503 | 1.012 |
| sclmeetLess than once a month | 0.528 | 0.334 | 0.834 |
| sclmeetOnce a month | 0.348 | 0.211 | 0.575 |
| sclmeetSeveral times a month | 0.284 | 0.174 | 0.464 |
| sclmeetOnce a week | 0.302 | 0.174 | 0.527 |
| sclmeetSeveral times a week | 0.323 | 0.178 | 0.586 |
| sclmeetEvery day | 0.537 | 0.231 | 1.252 |
# Returning pseudo R²
r_mcfadden
## [1] 0.2424953
Higher educational attainment showed a protective effect, with each level increase associated with 34,5% lower odds of clinical depression (OR = 0,655, 95% CI [0,544, 0,788]).
Among Hungarian respondents, 27,8% (n = 588) met the threshold for clinically significant depression, while 72,2% (n = 1496) did not. Weighted logistic regression showed that higher education, better self-rated health and more frequent social contact were associated with lower odds of clinically significant depression. McFadden’s pseudo R² showed a moderate model fit (R² = 0,18), indicating that the included variables explain a meaningful portion of the variance in depression status.
The Key protective factors against depression in Hungary are: higher education, good self-rated health and frequent social contact. The strongest protective effect emerged for good self-rated health (OR = 0,45, 95% CI [0,39-0,52]), indicating individuals reporting good health had less than half the odds of clinical depression compared to those with poorer health. Social activity showed a similarly protective pattern (OR = 0,67, 95% CI [0,58-0,77]), while education demonstrated more modest effects (OR = 0,82, 95% CI [0,76-0,89]). Contrary to expectations, internet use was associated with lower depression scores, possibly indicating its role in maintaining social connections. Women showed moderately higher depression levels, consistent with global trends. These findings suggest that mental health interventions should particularly target less educated individuals, those with poor health and socially isolated groups. Future research should explore the mechanisms behind the internet use-depression relationship in this cultural context.