# Introduction This analysis examines social determinants of depression in Austria using the European Social Survey (ESS11) dataset.
We begin with the full ESS11 dataset (N = 40156).
Hypotheses
# convert to numeric
df$d20 = as.numeric(df$fltdpr)
df$d21 = as.numeric(df$flteeff)
df$d22 = as.numeric(df$slprl)
df$d23 = as.numeric(df$wrhpp)
df$d24 = as.numeric(df$fltlnl)
df$d25 = as.numeric(df$enjlf)
df$d26 = as.numeric(df$fltsd)
df$d27 = as.numeric(df$cldgng)
# reverse scoring for the positive items
df$d23 = 5-df$d23
df$d25 = 5-df$d25
# check degree of consistency (internal consistency)
cronbach.alpha(df[,c("d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27" )], na.rm=T)
##
## Cronbach's alpha for the 'df[, c("d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27")]' data-set
##
## Items: 8
## Sample units: 40156
## alpha: 0.823
# compute the score
df$dep = rowSums(df[,c("d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27")]) / 8
library(ltm)
library(likert) # create basic Likert tables and plots
library(kableExtra) # create formatted tables
vnames = c("fltdpr", "flteeff", "slprl", "wrhpp", "fltlnl", "enjlf", "fltsd", "cldgng")
likert_df = df[,vnames]
likert_table = likert(likert_df)$results
likert_numeric_df = as.data.frame(lapply((df[,vnames]), as.numeric))
likert_table$Mean = unlist(lapply((likert_numeric_df[,vnames]), mean, na.rm=T)) # ... and append new columns to the data frame
likert_table$Count = unlist(lapply((likert_numeric_df[,vnames]), function (x) sum(!is.na(x))))
likert_table$Item <- c(
fltdpr = "How much of the time … feel depressed?",
flteeff = "… everything you did feel like an effort?",
slprl = "… was your sleep restless?",
wrhpp = "… did you feel happy?",
fltlnl = "… did you feel lonely?",
enjlf = "… did you enjoy life?",
fltsd = "… did you feel sad?",
cldgng = "… did you feel you could not get going?"
)
# round all percentage values to 1 decimal digit
likert_table[,2:6] = round(likert_table[,2:6],1)
# round means to 3 decimal digits
likert_table[,7] = round(likert_table[,7],3)
# create formatted table
kable_styling(kable(likert_table,
caption = "Distribution of answers regarding depression indicators (ESS round 11, all countries)"
)
)
| Item | None or almost none of the time | Some of the time | Most of the time | All or almost all of the time | Mean | Count |
|---|---|---|---|---|---|---|
| How much of the time … feel depressed? | 64.9 | 29.1 | 4.6 | 1.5 | 1.4 | 39981 |
| … everything you did feel like an effort? | 48.4 | 38.4 | 9.8 | 3.4 | 1.7 | 39983 |
| … was your sleep restless? | 43.9 | 39.9 | 11.6 | 4.6 | 1.8 | 40017 |
| … did you feel happy? | 4.0 | 23.5 | 48.9 | 23.6 | 2.9 | 39890 |
| … did you feel lonely? | 68.1 | 24.3 | 5.3 | 2.3 | 1.4 | 39983 |
| … did you enjoy life? | 5.3 | 24.8 | 44.8 | 25.0 | 2.9 | 39878 |
| … did you feel sad? | 52.5 | 41.1 | 4.9 | 1.6 | 1.6 | 39981 |
| … did you feel you could not get going? | 55.7 | 36.1 | 6.2 | 2.0 | 1.5 | 39949 |
# create basic plot (code also valid)
plot(likert(summary=likert_table[,1:6])) # limit to columns 1:6 to skip mean and count
table(df$cntry)
##
## Albania Austria Belgium Bulgaria
## 0 2354 1594 0
## Switzerland Cyprus Czechia Germany
## 1384 685 0 2420
## Denmark Estonia Spain Finland
## 0 0 1844 1563
## France United Kingdom Georgia Greece
## 1771 1684 0 2757
## Croatia Hungary Ireland Israel
## 1563 2118 2017 0
## Iceland Italy Lithuania Luxembourg
## 842 2865 1365 0
## Latvia Montenegro North Macedonia Netherlands
## 0 0 0 1695
## Norway Poland Portugal Romania
## 1337 1442 1373 0
## Serbia Russian Federation Sweden Slovenia
## 1563 0 1230 1248
## Slovakia Turkey Ukraine Kosovo
## 1442 0 0 0
unique(df$cntry)
## [1] Austria Belgium Switzerland Cyprus Germany
## [6] Spain Finland France United Kingdom Greece
## [11] Croatia Hungary Ireland Iceland Italy
## [16] Lithuania Netherlands Norway Poland Portugal
## [21] Serbia Sweden Slovenia Slovakia
## 40 Levels: Albania Austria Belgium Bulgaria Switzerland Cyprus ... Kosovo
# subset to Austria
df_Austria = df[df$cntry == "Austria", ]
nrow(df_Austria)
## [1] 2354
The Austrian sample consisted of 2354 respondents.
## alcfreq is factor, make it numeric & new levels & recode
df_Austria$alcfreq_num = NA
df_Austria$alcfreq_num[df_Austria$alcfreq == "Every day"] = 1
df_Austria$alcfreq_num[df_Austria$alcfreq == "Several times a week"] = 2
df_Austria$alcfreq_num[df_Austria$alcfreq == "Once a week"] = 3
df_Austria$alcfreq_num[df_Austria$alcfreq == "2-3 times a month"] = 4
df_Austria$alcfreq_num[df_Austria$alcfreq == "Once a month"] = 5
df_Austria$alcfreq_num[df_Austria$alcfreq == "Less than once a month"] = 6
df_Austria$alcfreq_num[df_Austria$alcfreq == "Never"] = 7
# recoding
df_Austria$alcfreq_recoded = 8 - df_Austria$alcfreq_num
table(df_Austria$alcfreq_recoded)
##
## 1 2 3 4 5 6 7
## 531 231 152 375 380 511 171
# group domicil
df_Austria$domicil = as.numeric(df_Austria$domicil)
table(df_Austria$domicil)
##
## 1 2 3 4 5
## 590 173 632 872 86
# now make levels to prove hypothesis
# Urban = Level 1 + 2 (A big city + Suburbs or outskirts of big city)
# Suburban = Level 3 (Town or small city)
# Rural = Level 4 +5 (Country village + Farm or home in countryside)
df_Austria$domicil_group = factor(NA, levels = c("Urban", "Suburban", "Rural"))
# Assign groups based on domicil levels
df_Austria$domicil_group[df_Austria$domicil %in% c(1, 2)] = "Urban"
df_Austria$domicil_group[df_Austria$domicil == 3] = "Suburban"
df_Austria$domicil_group[df_Austria$domicil %in% c(4, 5)] = "Rural"
#Model 4:
model4 = lm(dep ~ alcfreq_recoded + eatveg + hincfel + domicil_group + wlespdm, data = df_Austria, weights = anweight)
summary(model4)
##
## Call:
## lm(formula = dep ~ alcfreq_recoded + eatveg + hincfel + domicil_group +
## wlespdm, data = df_Austria, weights = anweight)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -0.83574 -0.12839 -0.02423 0.11260 2.10102
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 1.416574 0.095008
## alcfreq_recoded -0.005004 0.004508
## eatvegTwice a day -0.051553 0.054862
## eatvegOnce a day -0.034415 0.052688
## eatvegLess than once a day but at least 4 times a week -0.022375 0.054720
## eatvegLess than 4 times a week but at least once a week 0.032516 0.058839
## eatvegLess than once a week 0.221997 0.131784
## eatvegNever 0.644169 0.541707
## hincfelCoping on present income 0.052220 0.019516
## hincfelDifficult on present income 0.314644 0.031891
## hincfelVery difficult on present income 0.810762 0.058422
## domicil_groupSuburban -0.018652 0.023519
## domicil_groupRural -0.041669 0.020590
## wlespdmRarely 0.108020 0.086179
## wlespdmSometimes 0.138248 0.081121
## wlespdmOften 0.171953 0.080545
## wlespdmAlways 0.097088 0.084347
## t value Pr(>|t|)
## (Intercept) 14.910 < 2e-16 ***
## alcfreq_recoded -1.110 0.26706
## eatvegTwice a day -0.940 0.34748
## eatvegOnce a day -0.653 0.51371
## eatvegLess than once a day but at least 4 times a week -0.409 0.68266
## eatvegLess than 4 times a week but at least once a week 0.553 0.58058
## eatvegLess than once a week 1.685 0.09222 .
## eatvegNever 1.189 0.23451
## hincfelCoping on present income 2.676 0.00751 **
## hincfelDifficult on present income 9.866 < 2e-16 ***
## hincfelVery difficult on present income 13.878 < 2e-16 ***
## domicil_groupSuburban -0.793 0.42783
## domicil_groupRural -2.024 0.04311 *
## wlespdmRarely 1.253 0.21018
## wlespdmSometimes 1.704 0.08848 .
## wlespdmOften 2.135 0.03288 *
## wlespdmAlways 1.151 0.24984
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2361 on 2199 degrees of freedom
## (138 observations deleted due to missingness)
## Multiple R-squared: 0.127, Adjusted R-squared: 0.1207
## F-statistic: 20 on 16 and 2199 DF, p-value: < 2.2e-16
In the weighted model, the overall explained variance (R²) is 0.127 (adjusted R² = 0.121).
Financial strain (“Very difficult on present income”) remains the strongest predictor (β = 0.811. All other predictors (alcohol frequency, domicile, gender‐inequality perception) were not statistically significant in the full model.
Below we will:
## Predictors of Clinically Significant Depression
# Binary indicator: clinically significant depression if average item score >= 2.5
df_Austria$dep_binary = ifelse(df_Austria$dep >= 8, 1, 0)
# Check frequency
table(df_Austria$dep_binary)
##
## 0
## 2321
prop.table(table(df_Austria$dep_binary)) # relative frequencies
##
## 0
## 1
# Ensure categorical variables are factors
df_Austria$gndr = factor(df_Austria$gndr, labels = c("Male", "Female"))
df_Austria$eduyrs = as.numeric(df_Austria$eduyrs) # education (numeric)
df_Austria$health = as.numeric(df_Austria$health) # self-rated health
# Logistic regression
log_model = glm(dep_binary ~ alcfreq_recoded + eatveg + hincfel + domicil_group +
wlespdm + gndr + eduyrs + health,
data = df_Austria, family = binomial(), weights = anweight)
summary(log_model)
##
## Call:
## glm(formula = dep_binary ~ alcfreq_recoded + eatveg + hincfel +
## domicil_group + wlespdm + gndr + eduyrs + health, family = binomial(),
## data = df_Austria, weights = anweight)
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -2.647e+01 1.475e+05
## alcfreq_recoded 6.116e-03 6.583e+03
## eatvegTwice a day 3.068e-02 7.827e+04
## eatvegOnce a day 5.982e-02 7.522e+04
## eatvegLess than once a day but at least 4 times a week 6.125e-02 7.800e+04
## eatvegLess than 4 times a week but at least once a week 4.153e-02 8.408e+04
## eatvegLess than once a week 1.591e-01 1.753e+05
## eatvegNever 1.595e-01 6.893e+05
## hincfelCoping on present income 2.101e-02 2.826e+04
## hincfelDifficult on present income 2.925e-02 4.566e+04
## hincfelVery difficult on present income -2.199e-02 8.265e+04
## domicil_groupSuburban 1.096e-02 3.321e+04
## domicil_groupRural -3.736e-03 2.994e+04
## wlespdmRarely -1.891e-03 1.213e+05
## wlespdmSometimes -5.743e-03 1.139e+05
## wlespdmOften -4.458e-03 1.131e+05
## wlespdmAlways 2.714e-02 1.183e+05
## gndrFemale 4.122e-02 2.623e+04
## eduyrs -6.511e-03 3.598e+03
## health 3.274e-02 1.543e+04
## z value Pr(>|z|)
## (Intercept) 0 1
## alcfreq_recoded 0 1
## eatvegTwice a day 0 1
## eatvegOnce a day 0 1
## eatvegLess than once a day but at least 4 times a week 0 1
## eatvegLess than 4 times a week but at least once a week 0 1
## eatvegLess than once a week 0 1
## eatvegNever 0 1
## hincfelCoping on present income 0 1
## hincfelDifficult on present income 0 1
## hincfelVery difficult on present income 0 1
## domicil_groupSuburban 0 1
## domicil_groupRural 0 1
## wlespdmRarely 0 1
## wlespdmSometimes 0 1
## wlespdmOften 0 1
## wlespdmAlways 0 1
## gndrFemale 0 1
## eduyrs 0 1
## health 0 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 0.0000e+00 on 2174 degrees of freedom
## Residual deviance: 4.8927e-09 on 2155 degrees of freedom
## (179 observations deleted due to missingness)
## AIC: 40
##
## Number of Fisher Scoring iterations: 25
exp(coef(log_model))
## (Intercept)
## 3.187256e-12
## alcfreq_recoded
## 1.006135e+00
## eatvegTwice a day
## 1.031150e+00
## eatvegOnce a day
## 1.061647e+00
## eatvegLess than once a day but at least 4 times a week
## 1.063165e+00
## eatvegLess than 4 times a week but at least once a week
## 1.042402e+00
## eatvegLess than once a week
## 1.172438e+00
## eatvegNever
## 1.172925e+00
## hincfelCoping on present income
## 1.021230e+00
## hincfelDifficult on present income
## 1.029685e+00
## hincfelVery difficult on present income
## 9.782458e-01
## domicil_groupSuburban
## 1.011024e+00
## domicil_groupRural
## 9.962708e-01
## wlespdmRarely
## 9.981108e-01
## wlespdmSometimes
## 9.942739e-01
## wlespdmOften
## 9.955518e-01
## wlespdmAlways
## 1.027512e+00
## gndrFemale
## 1.042078e+00
## eduyrs
## 9.935099e-01
## health
## 1.033282e+00
The table displays the adjusted odds‐ratios (ORs) for predicting the odds of being classified as “clinically depressed”
alcfreq_recoded)eatveg)hincfel)domicil_group)wlespdm)gndr)eduyrs)health)Most important:
eatveg)gndr)