This analysis examines social determinants of depression in Austria using the European Social Survey (ESS11) dataset.
We begin with the full ESS11 dataset (N = ).
Hypotheses
# convert to numeric
df$d20 = as.numeric(df$fltdpr)
df$d21 = as.numeric(df$flteeff)
df$d22 = as.numeric(df$slprl)
df$d23 = as.numeric(df$wrhpp)
df$d24 = as.numeric(df$fltlnl)
df$d25 = as.numeric(df$enjlf)
df$d26 = as.numeric(df$fltsd)
df$d27 = as.numeric(df$cldgng)
# reverse scoring for the positive items
df$d23 = 5-df$d23
df$d25 = 5-df$d25
# check degree of consistency (internal consistency)
cronbach.alpha(df[,c("d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27" )], na.rm=T)
##
## Cronbach's alpha for the 'df[, c("d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27")]' data-set
##
## Items: 8
## Sample units: 40156
## alpha: 0.823
# compute the score
df$dep = rowSums(df[,c("d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27")]) / 8
library(ltm)
library(likert) # create basic Likert tables and plots
library(kableExtra) # create formatted tables
vnames = c("fltdpr", "flteeff", "slprl", "wrhpp", "fltlnl", "enjlf", "fltsd", "cldgng")
likert_df = df[,vnames]
likert_table = likert(likert_df)$results
likert_numeric_df = as.data.frame(lapply((df[,vnames]), as.numeric))
likert_table$Mean = unlist(lapply((likert_numeric_df[,vnames]), mean, na.rm=T)) # ... and append new columns to the data frame
likert_table$Count = unlist(lapply((likert_numeric_df[,vnames]), function (x) sum(!is.na(x))))
likert_table$Item <- c(
fltdpr = "How much of the time ⦠feel depressed?",
flteeff = "⦠everything you did feel like an effort?",
slprl = "⦠was your sleep restless?",
wrhpp = "⦠did you feel happy?",
fltlnl = "⦠did you feel lonely?",
enjlf = "⦠did you enjoy life?",
fltsd = "⦠did you feel sad?",
cldgng = "⦠did you feel you could not get going?"
)
# round all percentage values to 1 decimal digit
likert_table[,2:6] = round(likert_table[,2:6],1)
# round means to 3 decimal digits
likert_table[,7] = round(likert_table[,7],3)
# create formatted table
kable_styling(kable(likert_table,
caption = "Distribution of answers regarding depression indicators (ESS round 11, all countries)"
)
)
| Item | None or almost none of the time | Some of the time | Most of the time | All or almost all of the time | Mean | Count |
|---|---|---|---|---|---|---|
| How much of the time ⦠feel depressed? | 64.9 | 29.1 | 4.6 | 1.5 | 1.4 | 39981 |
| ⦠everything you did feel like an effort? | 48.4 | 38.4 | 9.8 | 3.4 | 1.7 | 39983 |
| ⦠was your sleep restless? | 43.9 | 39.9 | 11.6 | 4.6 | 1.8 | 40017 |
| ⦠did you feel happy? | 4.0 | 23.5 | 48.9 | 23.6 | 2.9 | 39890 |
| ⦠did you feel lonely? | 68.1 | 24.3 | 5.3 | 2.3 | 1.4 | 39983 |
| ⦠did you enjoy life? | 5.3 | 24.8 | 44.8 | 25.0 | 2.9 | 39878 |
| ⦠did you feel sad? | 52.5 | 41.1 | 4.9 | 1.6 | 1.6 | 39981 |
| ⦠did you feel you could not get going? | 55.7 | 36.1 | 6.2 | 2.0 | 1.5 | 39949 |
# create basic plot (code also valid)
plot(likert(summary=likert_table[,1:6])) # limit to columns 1:6 to skip mean and count
table(df$cntry)
##
## Albania Austria Belgium Bulgaria
## 0 2354 1594 0
## Switzerland Cyprus Czechia Germany
## 1384 685 0 2420
## Denmark Estonia Spain Finland
## 0 0 1844 1563
## France United Kingdom Georgia Greece
## 1771 1684 0 2757
## Croatia Hungary Ireland Israel
## 1563 2118 2017 0
## Iceland Italy Lithuania Luxembourg
## 842 2865 1365 0
## Latvia Montenegro North Macedonia Netherlands
## 0 0 0 1695
## Norway Poland Portugal Romania
## 1337 1442 1373 0
## Serbia Russian Federation Sweden Slovenia
## 1563 0 1230 1248
## Slovakia Turkey Ukraine Kosovo
## 1442 0 0 0
unique(df$cntry)
## [1] Austria Belgium Switzerland Cyprus Germany
## [6] Spain Finland France United Kingdom Greece
## [11] Croatia Hungary Ireland Iceland Italy
## [16] Lithuania Netherlands Norway Poland Portugal
## [21] Serbia Sweden Slovenia Slovakia
## 40 Levels: Albania Austria Belgium Bulgaria Switzerland Cyprus ... Kosovo
# subset to Austria
df_Austria = df[df$cntry == "Austria", ]
nrow(df_Austria)
## [1] 2354
The Austrian sample consisted of 2354 respondents.
## alcfreq is factor, make it numeric & new levels & recode
df_Austria$alcfreq_num = NA
df_Austria$alcfreq_num[df_Austria$alcfreq == "Every day"] = 1
df_Austria$alcfreq_num[df_Austria$alcfreq == "Several times a week"] = 2
df_Austria$alcfreq_num[df_Austria$alcfreq == "Once a week"] = 3
df_Austria$alcfreq_num[df_Austria$alcfreq == "2-3 times a month"] = 4
df_Austria$alcfreq_num[df_Austria$alcfreq == "Once a month"] = 5
df_Austria$alcfreq_num[df_Austria$alcfreq == "Less than once a month"] = 6
df_Austria$alcfreq_num[df_Austria$alcfreq == "Never"] = 7
# recoding
df_Austria$alcfreq_recoded = 8 - df_Austria$alcfreq_num
table(df_Austria$alcfreq_recoded)
##
## 1 2 3 4 5 6 7
## 531 231 152 375 380 511 171
# group domicil
df_Austria$domicil = as.numeric(df_Austria$domicil)
table(df_Austria$domicil)
##
## 1 2 3 4 5
## 590 173 632 872 86
# now make levels to prove hypothesis
# Urban = Level 1 + 2 (A big city + Suburbs or outskirts of big city)
# Suburban = Level 3 (Town or small city)
# Rural = Level 4 +5 (Country village + Farm or home in countryside)
df_Austria$domicil_group = factor(NA, levels = c("Urban", "Suburban", "Rural"))
# Assign groups based on domicil levels
df_Austria$domicil_group[df_Austria$domicil %in% c(1, 2)] = "Urban"
df_Austria$domicil_group[df_Austria$domicil == 3] = "Suburban"
df_Austria$domicil_group[df_Austria$domicil %in% c(4, 5)] = "Rural"
#Model 4:
model4 = lm(dep ~ alcfreq_recoded + eatveg + hincfel + domicil_group + wlespdm, data = df_Austria, weights = anweight)
summary(model4)
##
## Call:
## lm(formula = dep ~ alcfreq_recoded + eatveg + hincfel + domicil_group +
## wlespdm, data = df_Austria, weights = anweight)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -0.83574 -0.12839 -0.02423 0.11260 2.10102
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 1.416574 0.095008
## alcfreq_recoded -0.005004 0.004508
## eatvegTwice a day -0.051553 0.054862
## eatvegOnce a day -0.034415 0.052688
## eatvegLess than once a day but at least 4 times a week -0.022375 0.054720
## eatvegLess than 4 times a week but at least once a week 0.032516 0.058839
## eatvegLess than once a week 0.221997 0.131784
## eatvegNever 0.644169 0.541707
## hincfelCoping on present income 0.052220 0.019516
## hincfelDifficult on present income 0.314644 0.031891
## hincfelVery difficult on present income 0.810762 0.058422
## domicil_groupSuburban -0.018652 0.023519
## domicil_groupRural -0.041669 0.020590
## wlespdmRarely 0.108020 0.086179
## wlespdmSometimes 0.138248 0.081121
## wlespdmOften 0.171953 0.080545
## wlespdmAlways 0.097088 0.084347
## t value Pr(>|t|)
## (Intercept) 14.910 < 2e-16 ***
## alcfreq_recoded -1.110 0.26706
## eatvegTwice a day -0.940 0.34748
## eatvegOnce a day -0.653 0.51371
## eatvegLess than once a day but at least 4 times a week -0.409 0.68266
## eatvegLess than 4 times a week but at least once a week 0.553 0.58058
## eatvegLess than once a week 1.685 0.09222 .
## eatvegNever 1.189 0.23451
## hincfelCoping on present income 2.676 0.00751 **
## hincfelDifficult on present income 9.866 < 2e-16 ***
## hincfelVery difficult on present income 13.878 < 2e-16 ***
## domicil_groupSuburban -0.793 0.42783
## domicil_groupRural -2.024 0.04311 *
## wlespdmRarely 1.253 0.21018
## wlespdmSometimes 1.704 0.08848 .
## wlespdmOften 2.135 0.03288 *
## wlespdmAlways 1.151 0.24984
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2361 on 2199 degrees of freedom
## (138 Beobachtungen als fehlend gelƶscht)
## Multiple R-squared: 0.127, Adjusted R-squared: 0.1207
## F-statistic: 20 on 16 and 2199 DF, p-value: < 2.2e-16
In the weighted model, the overall explained variance (R²) is
0.127 (adjusted R² = 0.121).
Financial strain (āVery difficult on present incomeā) remains the
strongest predictor (β =
0.811.
All other predictors (alcohol frequency, domicile, genderāinequality perception) were not statistically significant in the full model.
Compared to the unweighted model, the weighted model explains slightly more variance (R² = 0.127), suggesting that applying the ESS analytical weights increases the modelās fit.
Below we will:
df_Austria$dep_binary = ifelse(df_Austria$dep >= 8, 1,0)
# check frequency
table(df_Austria$dep_binary)
##
## 0
## 2321
prop.table(table(df_Austria$dep_binary))
##
## 0
## 1
# ensure categorical variables
df_Austria$gndr = factor(df_Austria$gndr, labels = c("Male", "Female"))
df_Austria$eduyrs = as.numeric(df_Austria$eduyrs)
df_Austria$health = as.numeric(df_Austria$health)
#regression
log_model = glm(dep_binary ~ alcfreq_recoded + eatveg + hincfel + domicil_group + wlespdm + gndr + eduyrs + health, data = df_Austria, family = binomial(), weights = anweight)
summary(log_model)
##
## Call:
## glm(formula = dep_binary ~ alcfreq_recoded + eatveg + hincfel +
## domicil_group + wlespdm + gndr + eduyrs + health, family = binomial(),
## data = df_Austria, weights = anweight)
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -2.647e+01 1.475e+05
## alcfreq_recoded 6.116e-03 6.583e+03
## eatvegTwice a day 3.068e-02 7.827e+04
## eatvegOnce a day 5.982e-02 7.522e+04
## eatvegLess than once a day but at least 4 times a week 6.125e-02 7.800e+04
## eatvegLess than 4 times a week but at least once a week 4.153e-02 8.408e+04
## eatvegLess than once a week 1.591e-01 1.753e+05
## eatvegNever 1.595e-01 6.893e+05
## hincfelCoping on present income 2.101e-02 2.826e+04
## hincfelDifficult on present income 2.925e-02 4.566e+04
## hincfelVery difficult on present income -2.199e-02 8.265e+04
## domicil_groupSuburban 1.096e-02 3.321e+04
## domicil_groupRural -3.736e-03 2.994e+04
## wlespdmRarely -1.891e-03 1.213e+05
## wlespdmSometimes -5.743e-03 1.139e+05
## wlespdmOften -4.458e-03 1.131e+05
## wlespdmAlways 2.714e-02 1.183e+05
## gndrFemale 4.122e-02 2.623e+04
## eduyrs -6.511e-03 3.598e+03
## health 3.274e-02 1.543e+04
## z value Pr(>|z|)
## (Intercept) 0 1
## alcfreq_recoded 0 1
## eatvegTwice a day 0 1
## eatvegOnce a day 0 1
## eatvegLess than once a day but at least 4 times a week 0 1
## eatvegLess than 4 times a week but at least once a week 0 1
## eatvegLess than once a week 0 1
## eatvegNever 0 1
## hincfelCoping on present income 0 1
## hincfelDifficult on present income 0 1
## hincfelVery difficult on present income 0 1
## domicil_groupSuburban 0 1
## domicil_groupRural 0 1
## wlespdmRarely 0 1
## wlespdmSometimes 0 1
## wlespdmOften 0 1
## wlespdmAlways 0 1
## gndrFemale 0 1
## eduyrs 0 1
## health 0 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 0.0000e+00 on 2174 degrees of freedom
## Residual deviance: 4.8927e-09 on 2155 degrees of freedom
## (179 Beobachtungen als fehlend gelƶscht)
## AIC: 40
##
## Number of Fisher Scoring iterations: 25
exp(coef(log_model))
## (Intercept)
## 3.187256e-12
## alcfreq_recoded
## 1.006135e+00
## eatvegTwice a day
## 1.031150e+00
## eatvegOnce a day
## 1.061647e+00
## eatvegLess than once a day but at least 4 times a week
## 1.063165e+00
## eatvegLess than 4 times a week but at least once a week
## 1.042402e+00
## eatvegLess than once a week
## 1.172438e+00
## eatvegNever
## 1.172925e+00
## hincfelCoping on present income
## 1.021230e+00
## hincfelDifficult on present income
## 1.029685e+00
## hincfelVery difficult on present income
## 9.782458e-01
## domicil_groupSuburban
## 1.011024e+00
## domicil_groupRural
## 9.962708e-01
## wlespdmRarely
## 9.981108e-01
## wlespdmSometimes
## 9.942739e-01
## wlespdmOften
## 9.955518e-01
## wlespdmAlways
## 1.027512e+00
## gndrFemale
## 1.042078e+00
## eduyrs
## 9.935099e-01
## health
## 1.033282e+00
The table displays the adjusted oddsāratios (ORs) for predicting the odds of being classified as āclinically depressedā
alcfreq_recoded)eatveg)hincfel)domicil_group)wlespdm)gndr)eduyrs)health)Most important:
eatveg)gndr)Gender Distribution of Austrian Sample: The Austrian ESS11 sample is composed of 1000 men and about 1600 women, showing a roughly balanced gender split.
library(ggplot2)
ggplot(df_Austria, aes(x = gndr)) +
geom_bar(fill = "steelblue") +
labs(title = "Gender Distribution of Austrian Sample",
x = "Gender",
y = "Count of Respondents",
caption = "ESS Round 11") +
theme_minimal()
Depression Scores by Domicile Group: People living in rural areas report higher average depression scores than those in towns or cities.
ggplot(df_Austria, aes(x = domicil_group, y = dep, fill = domicil_group)) +
geom_boxplot(alpha = 0.7) +
labs(title = "Depression Scores by Domicile Group",
x = "Domicile Type",
y = "Average Depression Score") +
theme_minimal() +
scale_fill_brewer(palette = "Set2") +
theme(legend.position="none")
Feelings by How Often People Drink: Average depression scores rise as alcohol consumption becomes more frequent.
ggplot(df_Austria, aes(x = alcfreq_recoded, y = dep)) +
geom_boxplot(fill = "pink", alpha = 0.7, na.rm = TRUE) +
labs(
title = "Feelings by How Often People Drink",
x = "Drinking Frequency",
y = "Average Feeling Score"
) +
theme_minimal()
This stacked 100% bar chart shows, for men and for women, the share of respondents in each vegetableāeating frequency category.You can see at a glance that women have a larger proportion in the highest consumption categories (dark and light green) compared to men.
# now improve colors, from very good to very bad - without NA!!
library(scales)
ggplot(df[!is.na(df$eatveg),], aes(gndr)) +
geom_bar(aes(fill=eatveg), position = "fill", width=.6) +
scale_y_continuous(labels = percent) +
coord_flip() +
scale_fill_manual(values = c("darkgreen", "lightgreen", "grey", "orange", "red", "blue", "lightblue")) +
labs(title = "EATVEG by Gender",
subtitle = "ESS mRound 11",
x="Gender",
y = "%",
caption = "Jennifer Mandl", fill="Gender") +
theme_minimal()