Introduction

This document analyzes depressive symptoms in Austria using data from the European Social Survey (ESS11). The analysis includes descriptive statistics and a regression model of the CES-D8 depression scale, using both unweighted and weighted approaches.

Loading Required Libraries

library(foreign)
library(likert)
## Warning: package 'likert' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
## Loading required package: xtable
## Warning: package 'xtable' was built under R version 4.4.3
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.4.3
library(psych)
## Warning: package 'psych' was built under R version 4.4.3
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

Loading and Preparing the Data

setwd("C:/Users/Vinicius Alpha/Downloads/")

suppressWarnings(df <- read.spss("ESS11.sav", to.data.frame = TRUE))

# Filter for Austria only
df_AT <- subset(df, cntry == "Austria")

# Print sample size
nrow(df_AT)
## [1] 2354

The Austrian sample consists of 2354 respondents.

Depression Scale (CES-D8)

We construct the CES-D8 scale by using eight self-reported depression-related items. Two positively worded items are reverse-coded.

# Convert items to numeric
df_AT$d1 <- as.numeric(df_AT$fltdpr)
df_AT$d2 <- as.numeric(df_AT$flteeff)
df_AT$d3 <- as.numeric(df_AT$slprl)
df_AT$d4 <- as.numeric(df_AT$wrhpp)
df_AT$d5 <- as.numeric(df_AT$fltlnl)
df_AT$d6 <- as.numeric(df_AT$enjlf)
df_AT$d7 <- as.numeric(df_AT$fltsd)
df_AT$d8 <- as.numeric(df_AT$cldgng)

# Reverse score the positive items
df_AT$d4 <- 5 - df_AT$d4
df_AT$d6 <- 5 - df_AT$d6

# Ensure numeric conversion is strict
scale_items <- df_AT[, c("d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8")]
scale_items[] <- lapply(scale_items, function(x) as.numeric(as.character(x)))

# Compute depression score (mean)
df_AT$CES_D8 <- rowMeans(scale_items, na.rm = TRUE)

Likert Table and Plot

Below we describe the eight items included in the CES-D8 depression scale:

  1. fltdprHow much of the time during the past week did you feel depressed?
  2. flteeff… did you feel that everything you did was an effort?
  3. slprl… was your sleep restless?
  4. wrhpp… did you feel happy? (positive, reverse-coded)
  5. fltlnl… did you feel lonely?
  6. enjlf… did you enjoy life? (positive, reverse-coded)
  7. fltsd… did you feel sad?
  8. cldgng… did you feel you could not get going?

These items were rated on a Likert scale and are used to assess the presence and intensity of depressive symptoms experienced in the past week.

We now present the individual items as a table and visualize the results using the likert package.

likert_items <- df_AT[, c("fltdpr", "flteeff", "slprl", "wrhpp", "fltlnl", "enjlf", "fltsd", "cldgng")]

# Convert all to ordered factors 
likert_items[] <- lapply(likert_items, function(x) factor(as.character(x), ordered = TRUE))

# Create Likert object
likert_obj <- likert(likert_items)

# Table of results
summary_likert <- likert_obj$results
kable(summary_likert, caption = "Distribution of Depression Scale Responses") %>% kable_styling()
Distribution of Depression Scale Responses
Item All or almost all of the time Most of the time None or almost none of the time Some of the time
fltdpr 0.8510638 2.340425 68.170213 28.63830
flteeff 2.0442930 7.112436 52.129472 38.71380
slprl 3.1116795 7.630008 50.554135 38.70418
wrhpp 21.8990590 47.048760 3.592814 27.45937
fltlnl 1.3617021 3.276596 77.361702 18.00000
enjlf 22.0795892 41.420625 4.321780 32.17801
fltsd 1.3219616 2.089552 68.656716 27.93177
cldgng 1.2782275 2.939923 69.237324 26.54452
# Plot
plot(likert_obj)

Regression Analysis: CES-D8 Predictors

We now assess potential predictors of depressive symptoms using a linear regression model. The predictors include:

We test both unweighted and weighted models.

# Transform to numeric
df_AT$fltlnl <- as.numeric(as.factor(df_AT$fltlnl))
df_AT$sclmeet <- as.numeric(as.factor(df_AT$sclmeet))
df_AT$health <- as.numeric(as.factor(df_AT$health))
df_AT$dosprt <- as.numeric(as.factor(df_AT$dosprt))

# Replace function
replace_with_mode <- function(x) {
  x <- as.character(x)
  non_na <- x[!is.na(x)]
  if (length(non_na) == 0) return(x)
  mode_value <- names(sort(table(non_na), decreasing = TRUE))[1]
  x[is.na(x)] <- mode_value
  return(as.numeric(as.character(x)))
}

# Apply to predictors
df_AT$fltlnl <- replace_with_mode(df_AT$fltlnl)
df_AT$sclmeet <- replace_with_mode(df_AT$sclmeet)
df_AT$health <- replace_with_mode(df_AT$health)
df_AT$dosprt <- replace_with_mode(df_AT$dosprt)

# Checking again if missing values are fixed
colSums(is.na(df_AT[, c("fltlnl", "sclmeet", "health", "dosprt")]))
##  fltlnl sclmeet  health  dosprt 
##       0       0       0       0
summary(df_AT$fltlnl)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   1.000   1.286   1.000   4.000
summary(df_AT$sclmeet)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   4.000   5.000   4.892   6.000   7.000
summary(df_AT$health)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   2.000   2.064   3.000   5.000
### Unweighted Model
# Subset complete cases for model variables
subset_vars = df_AT[, c("fltlnl", "sclmeet", "health", "dosprt")]
correlation = cor(subset_vars, use = "complete.obs")
print(correlation)
##              fltlnl     sclmeet     health      dosprt
## fltlnl   1.00000000 -0.04745841  0.2541255 -0.09371122
## sclmeet -0.04745841  1.00000000 -0.1943385  0.09867391
## health   0.25412550 -0.19433852  1.0000000 -0.27171649
## dosprt  -0.09371122  0.09867391 -0.2717165  1.00000000
# Fit model
model_unweighted <- lm(CES_D8 ~ fltlnl + sclmeet + health + dosprt, data = df_AT)
summary(model_unweighted)
## 
## Call:
## lm(formula = CES_D8 ~ fltlnl + sclmeet + health + dosprt, data = df_AT)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.00555 -0.23609 -0.05078  0.19148  1.67036 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.795920   0.039992  19.902   <2e-16 ***
## fltlnl       0.381485   0.011619  32.832   <2e-16 ***
## sclmeet     -0.004519   0.005597  -0.807    0.419    
## health       0.177576   0.008319  21.346   <2e-16 ***
## dosprt      -0.003870   0.002861  -1.353    0.176    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3229 on 2346 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.4759, Adjusted R-squared:  0.475 
## F-statistic: 532.5 on 4 and 2346 DF,  p-value: < 2.2e-16

Weighted Model (using dweight)

# Fit weighted model
model_weighted <- lm(CES_D8 ~ fltlnl + sclmeet + health + dosprt, data = df_AT, weights = dweight)
summary(model_weighted)
## 
## Call:
## lm(formula = CES_D8 ~ fltlnl + sclmeet + health + dosprt, data = df_AT, 
##     weights = dweight)
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.66127 -0.21712 -0.03533  0.17408  2.35889 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.773847   0.039772  19.457   <2e-16 ***
## fltlnl       0.388493   0.012009  32.350   <2e-16 ***
## sclmeet     -0.002070   0.005538  -0.374   0.7086    
## health       0.180318   0.008363  21.561   <2e-16 ***
## dosprt      -0.005710   0.002828  -2.019   0.0436 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3171 on 2346 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.463,  Adjusted R-squared:  0.462 
## F-statistic: 505.6 on 4 and 2346 DF,  p-value: < 2.2e-16

Model Comparison

cat("R-squared unweighted: ", round(summary(model_unweighted)$r.squared, 3), "\n")
## R-squared unweighted:  0.476
cat("R-squared weighted: ", round(summary(model_weighted)$r.squared, 3), "\n")
## R-squared weighted:  0.463

The weighted model shows a slightly different R-squared, indicating the influence of survey weights on model performance. Compare coefficients for further interpretation.