library(foreign)
library(ltm)
## Loading required package: MASS
## Loading required package: msm
## Loading required package: polycor
library(likert)
## Loading required package: ggplot2
## Loading required package: xtable
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
df = read.spss("C:/Users/majam/Downloads/ESS11(1).sav", to.data.frame = T)

Introduction

Depression is a prevalent mental health condition influenced by a complex interplay of social, economic, and environmental factors (Ali & Kassem, 2021). Understanding these determinants is essential to design effective interventions and policies. This analysis employs the CES-D8 Depression Scale to operationalize depressive symptoms and examines the effects of three social determinants: income perception, social meeting frequency, and childhood household conflicts (European Social Survey, 2022).

library(likert)
df$d20 = as.numeric(df$fltdpr)
df$d21 = as.numeric(df$flteeff)
df$d22 = as.numeric(df$slprl)
df$d23 = as.numeric(df$wrhpp)
df$d24 = as.numeric(df$fltlnl)
df$d25 = as.numeric(df$enjlf)
df$d26 = as.numeric(df$fltsd)
df$d27 = as.numeric(df$cldgng)

df$d23 = 5 - df$d23
df$d25 = 5 - df$d25

df_austria <- df[df$cntry == "Austria", ]
df_austria <- df_austria[, order(names(df_austria))]

Variables related to depression were standardized for consistency (e.g., reversing scales for “happy” and “enjoyed life” items).

library(likert)
library(kableExtra)
vnames = c("fltdpr","flteeff","slprl","wrhpp","fltlnl","enjlf","fltsd","cldgng")
likert_df = df[,vnames]

likert_table = likert(likert_df)$results 
likert_numeric_df = as.data.frame(lapply((df[,vnames]), as.numeric))
likert_table$Mean = unlist(lapply((likert_numeric_df[,vnames]), mean, na.rm=T)) # ... and append new columns to the data frame
likert_table$Count = unlist(lapply((likert_numeric_df[,vnames]), function (x) sum(!is.na(x))))
likert_table$Item = c(
  d20="How much of the time during the past week did you felt depressed?",
  d21="How much of the time during the past week did you felt that everything you did was an effort?",
  d22="How much of the time during the past week did your sleep was restless?",
  d23="How much of the time during the past week did you were happy?",
  d24="How much of the time during the past week did you felt lonely?",
  d25="How much of the time during the past week did you enjoyed life?",
  d26="How much of the time during the past week did you felt sad?",
  d27="How much of the time during the past week did you could not get going?")

likert_table[,2:6] = round(likert_table[,2:6],1)
# round means to 3 decimal digits
likert_table[,7] = round(likert_table[,7],3)

# create formatted table
kable_styling(kable(likert_table,
                    caption = "Distribution of answers regarding depression indicators (ESS round 11, all countries)"
                    )
              )
Distribution of answers regarding depression indicators (ESS round 11, all countries)
Item None or almost none of the time Some of the time Most of the time All or almost all of the time Mean Count
How much of the time during the past week did you felt depressed? 64.9 29.1 4.6 1.5 1.4 39981
How much of the time during the past week did you felt that everything you did was an effort? 48.4 38.4 9.8 3.4 1.7 39983
How much of the time during the past week did your sleep was restless? 43.9 39.9 11.6 4.6 1.8 40017
How much of the time during the past week did you were happy? 4.0 23.5 48.9 23.6 2.9 39890
How much of the time during the past week did you felt lonely? 68.1 24.3 5.3 2.3 1.4 39983
How much of the time during the past week did you enjoyed life? 5.3 24.8 44.8 25.0 2.9 39878
How much of the time during the past week did you felt sad? 52.5 41.1 4.9 1.6 1.6 39981
How much of the time during the past week did you could not get going? 55.7 36.1 6.2 2.0 1.5 39949
# create basic plot (code also valid)
plot(likert(summary=likert_table[,1:6]))

cronbach.alpha(df_austria[,c("d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27")], na.rm=T)
## 
## Cronbach's alpha for the 'df_austria[, c("d20", "d21", "d22", "d23", "d24", "d25", "d26", ' '    "d27")]' data-set
## 
## Items: 8
## Sample units: 2354
## alpha: 0.801

Cronbach’s alpha 0.8009065 was calculated, demonstrating good internal consistency for the depression items. The dataset was subset for Austria, and variables were recoded into numeric formats for analysis.

df_austria$depression = rowSums(df_austria[,c("d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27")]) / 8
summary(df_austria$depression)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.000   1.250   1.500   1.609   1.875   4.000      33
#Min.    1st Qu. Median  Mean    3rd Qu. Max.       NA's 
#1.000   1.250   1.500   1.609   1.875   4.000      33

hist(df_austria$depression, breaks=8)

Hypothesis 1:

Individuals with negative feelings about income levels are more likely to experience higher levels of depression. Levels 1 - 4 - 1: Living comfortable - 2: Coping - 3: Difficult - 4: Very difficult

df_austria$hincfel_num <- as.numeric(factor(df_austria$hincfel, levels = c(
    "Living comfortably on present income", 
    "Coping on present income", 
    "Difficult on present income", 
    "Very difficult on present income")))

t1 = table(df_austria$depression, df_austria$hincfel_num)

model <- lm(depression ~ hincfel_num, data = df_austria)
summary(model)
## 
## Call:
## lm(formula = depression ~ hincfel_num, data = df_austria)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.98512 -0.31000 -0.08478  0.24011  2.36511 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.28466    0.02457   52.29   <2e-16 ***
## hincfel_num  0.17511    0.01244   14.08   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4257 on 2305 degrees of freedom
##   (47 observations deleted due to missingness)
## Multiple R-squared:  0.07921,    Adjusted R-squared:  0.07881 
## F-statistic: 198.3 on 1 and 2305 DF,  p-value: < 2.2e-16

Results Hypothesis 1:

Statistically significant positive relationship between perceived income difficulty and depression. As people perceive their income situation as more difficult, their depression scores tend to increase. The average depression score is 1.6087355. A linear regression predicting depression from perceived income difficulty yielded an intercept of 1.28 and a slope of 0.175. This indicates that each one-step increase in perceived income difficulty increases depression score by 0.175 units. The model is statistically significant (p < <2e-16) with an R² of 0.079, explaining about 7.9% of the variance in depression.

Hypothesis 2:

Social Meeting: Lower social activities are associated with higher levels of depression.

df_austria$sclmeet_num <- as.numeric(factor(df_austria$sclmeet, levels = c(
"Never",
"Less than once a month",
"Once a month",
"Several times a month",
"Once a week",
"Several times a week"
)))

df_austria$sclmeet_num = 7 - df_austria$sclmeet_num

plot(df_austria$sclmeet_num, df_austria$depression, 
     xlab = "Social Meetings (sclmeet_num)", 
     ylab = "Depression", 
     main = "Scatterplot of Depression vs. Social Meetings")
abline(lm(depression ~ sclmeet_num, data = df_austria), col = "blue")

model2 <- lm(depression ~ sclmeet_num, data = df_austria)
summary(model2)
## 
## Call:
## lm(formula = depression ~ sclmeet_num, data = df_austria)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.73724 -0.30439 -0.06653  0.22919  2.44561 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.508672   0.020176  74.777  < 2e-16 ***
## sclmeet_num 0.045713   0.008107   5.639 1.93e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4356 on 2207 degrees of freedom
##   (145 observations deleted due to missingness)
## Multiple R-squared:  0.0142, Adjusted R-squared:  0.01375 
## F-statistic: 31.79 on 1 and 2207 DF,  p-value: 1.933e-08

Results Hypothesis 2:

A linear regression showed that lower frequency of social meetings was significantly associated with higher levels of depression. Specifically, for each unit increase in social isolation (sclmeet_num), the depression score increased by 0.046 units (t = 5.64, p = <0.001). The model explained 1.4% of the variance in depression.

Hypothesis 3

More frequent serious conflicts in the household during childhood are associated with higher levels of depression in adulthood.

df_austria$cnfpplh_num <- as.numeric(factor(df_austria$cnfpplh, levels = c(
  "Always",
"Often",
"Sometimes",
"Hardly ever",
"Never"
)))

df_austria$cnfpplh_num = 6 - df_austria$cnfpplh_num
model3 <- lm(depression ~ cnfpplh_num, data = df_austria)
summary(model3)
## 
## Call:
## lm(formula = depression ~ cnfpplh_num, data = df_austria)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9601 -0.3401 -0.1003  0.2697  2.3947 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.360368   0.020537   66.24   <2e-16 ***
## cnfpplh_num 0.119944   0.008948   13.40   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4283 on 2305 degrees of freedom
##   (47 observations deleted due to missingness)
## Multiple R-squared:  0.07232,    Adjusted R-squared:  0.07192 
## F-statistic: 179.7 on 1 and 2305 DF,  p-value: < 2.2e-16

Results Hypothesis 3:

The linear regression revealed that each unit increase in income difficulty is associated with an average increase in depression score of 0.175, which was statistically significant (p < 2.92^{-43}).

Multivariate Regression Model

model_multi <- lm(depression ~ hincfel_num + sclmeet_num + cnfpplh_num, data = df_austria, weights = anweight)
summary_model <- summary(model_multi) 
summary_model
## 
## Call:
## lm(formula = depression ~ hincfel_num + sclmeet_num + cnfpplh_num, 
##     data = df_austria, weights = anweight)
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.78990 -0.12519 -0.02411  0.10213  1.71438 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.989609   0.031325   31.59  < 2e-16 ***
## hincfel_num 0.148547   0.012059   12.32  < 2e-16 ***
## sclmeet_num 0.035115   0.007667    4.58 4.91e-06 ***
## cnfpplh_num 0.118660   0.008641   13.73  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2288 on 2180 degrees of freedom
##   (170 observations deleted due to missingness)
## Multiple R-squared:  0.1647, Adjusted R-squared:  0.1635 
## F-statistic: 143.3 on 3 and 2180 DF,  p-value: < 2.2e-16

In the multivariate regression model, depression increased by 0.15 points per unit increase in income difficulty,
0.04 per unit less frequent social meetings,
and 0.12 per unit less trust in people.
The model explained 16.5% of the variance in depression scores.

The residual variance goes from \(\sigma^2_{\rm unw}\approx0.16\) (unweighted) to \(\sigma^2_{w}\approx0.05\) (weighted). Weighting with the ESS analysis weight anweight reduces the residual variance, meaning the weighted model explains more of the variation in depression scores, has much smaller residuals on average, and yields more efficient parameter estimates.

Predictors of clinically Significant Depression

# Check frequency of depression_clinical
df_austria$depression_clinical <- ifelse(df_austria$depression >= 8, 1, 0)

table(df_austria$depression_clinical)
## 
##    0 
## 2321
prop.table(table(df_austria$depression_clinical)) * 100  # relative frequencies (%)
## 
##   0 
## 100
df_austria$gndr = factor(df_austria$gndr, labels = c("Male", "Female"))
df_austria$eduyrs = as.numeric(df_austria$eduyrs)
df_austria$health = as.numeric(df_austria$health)

# Logistic regression — using your predictors from HW4
log_model = glm(depression_clinical ~ hincfel_num + sclmeet_num + cnfpplh_num + gndr + eduyrs + health, data = df_austria, family = binomial(), weights = anweight) 
# Model summary
summary(log_model)
## 
## Call:
## glm(formula = depression_clinical ~ hincfel_num + sclmeet_num + 
##     cnfpplh_num + gndr + eduyrs + health, family = binomial(), 
##     data = df_austria, weights = anweight)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.639e+01  7.405e+04       0        1
## hincfel_num  8.376e-03  1.800e+04       0        1
## sclmeet_num  8.522e-03  1.117e+04       0        1
## cnfpplh_num -5.820e-03  1.270e+04       0        1
## gndrFemale   2.992e-02  2.481e+04       0        1
## eduyrs      -7.143e-03  3.432e+03       0        1
## health       3.217e-02  1.583e+04       0        1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 0.0000e+00  on 2147  degrees of freedom
## Residual deviance: 4.8609e-09  on 2141  degrees of freedom
##   (206 observations deleted due to missingness)
## AIC: 14
## 
## Number of Fisher Scoring iterations: 25
# Odds Ratios
exp(coef(log_model))
##  (Intercept)  hincfel_num  sclmeet_num  cnfpplh_num   gndrFemale       eduyrs 
## 3.465490e-12 1.008411e+00 1.008558e+00 9.941966e-01 1.030374e+00 9.928821e-01 
##       health 
## 1.032695e+00

The logistic regression model predicting clinically significant depression (CES-D-8 ≥ 9) did not converge, likely due to low base rate of clinically significant cases and too many predictors relative to the number of events. As a result, this model could not be interpreted reliably?