setwd(“C:/Users/User/Downloads/ssp saffie”) df =
read.spss(“ESS11.sav”, to.data.frame = T) `{r} ...
###Prevalence and relationship between depression and social health
determinants
H1: The more feeling of depression “fltdpr” is associated with higher levels of depression H1 : Sleeplessness “slprl” is associated with high levels of depression H1 : Sadness “fltsd” is associated to increase of depression levels H1 : Loneliness “fltlnl” is associated to higher depression levels H1 : Happiness “wrhpp” is associated to low levels of depression H1 : Could not get going “cldgng” is related to an increase in depression H1 : The feeling that everything did as effort “flteeff” is associated with an increase in depression.
#We had to first test the chosen variables ie fltlnl, slprl, fltsd, fltsd, wrhpp, cldgng, and flteeff from the database for reliability and the Cronbach’s alpha value was 0.714 which indicates acceptable internal consistency for the CES-D8 depression scale. # The table below shows the descriptive statistics summarizing the distribution of the depression-dependent variable with its interpretation.
library(foreign) #to read SPSS and other formats)
library(ltm)
## Loading required package: MASS
## Loading required package: msm
## Loading required package: polycor
library(kableExtra)
library (ggplot2)
library (broom)
library(likert)
## Loading required package: xtable
library(pseudo)
## Loading required package: KMsurv
## Loading required package: geepack
library(xtable)
library(MASS)
library(msm)
library(polycor)
df = read.spss("C:/Users/User/Downloads/ssp saffie/ESS11.sav", to.data.frame = T)
# knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
Min (Minimum) 7.00 The lowest depression score in the dataset is 7. 1st Quartile (Q1) 9.00 25% of the participants scored ≤ 9 on the depression scale. Median (Q2) 10.00 The middle value in the dataset (50% of participants scored ≤ 10). Mean (Average) 10.78 The average depression score is 10.78. 3rd Quartile (Q3) 12.00 75% of participants scored ≤ 12 on the depression scale. Max (Maximum) 28.00 The highest depression score in the dataset is 28. NA’s (Missing Values) 18 There are 18 missing values in the dataset.
#we decided to subset Finland data
DataFI = subset(df,cntry == "Finland")
#DataFI
#selected explanatory variables/predictor variables
#fltlnl felt lonely, how often past week
#slprl sleep was restless,how often past week
#fltdpr felt depressed,how often past week
#wrhpp felt happy in the past week
#fltsd felt sad how often in the past week
#cldgng couldnot get going how often in the past week
#flteeff felt everything did as effort, how often past week
#enjlf enjoy life how often past week
# Now I am going to elaborate d20-d27 variables to create the CES-D8 depression scale
# First of all I have to reverse "wrhpp" as their indication of wellbeing is exactly reversed from the other variables
# I will do this with "5-" so we get numbers from 1-4
DataFI$wrhpp_num = as.numeric(DataFI$wrhpp)
DataFI$wrhpp_num = 5 - DataFI$wrhpp_num #add varible no eight
DataFI$enjlf_num = as.numeric(DataFI$enjlf)
DataFI$enjlf_num = 5 - DataFI$enjlf_num #add varible no eight
#table(DataFI$wrhpp_num)
# Now I transform the other scales into numeric ones to calculate with it
DataFI$fltdpr_num = as.numeric(DataFI$fltdpr)
DataFI$flteeff_num = as.numeric(DataFI$flteeff)
DataFI$slprl_num = as.numeric(DataFI$slprl)
DataFI$fltlnl_num = as.numeric(DataFI$fltlnl)
DataFI$fltsd_num = as.numeric(DataFI$fltsd)
DataFI$cldgng_num = as.numeric(DataFI$cldgng)
# long version
subset_vars <- DataFI[, c("fltdpr_num", "enjlf_num", "slprl_num", "wrhpp_num", "fltlnl_num", "fltsd_num", "flteeff_num", "cldgng_num")]
likert_means = c()
likert_means$wrhpp_num = mean(DataFI$wrhpp, na.rm=T)
## Warning in mean.default(DataFI$wrhpp, na.rm = T): argument is not numeric or
## logical: returning NA
likert_means$enjlf_num = mean(DataFI$enjlf, na.rm=T)
## Warning in mean.default(DataFI$enjlf, na.rm = T): argument is not numeric or
## logical: returning NA
likert_means$fltdpr_num = mean(DataFI$fltdpr, na.rm=T)
## Warning in mean.default(DataFI$fltdpr, na.rm = T): argument is not numeric or
## logical: returning NA
likert_means$fltnl_num = mean(DataFI$fltnl, na.rm=T)
## Warning in mean.default(DataFI$fltnl, na.rm = T): argument is not numeric or
## logical: returning NA
likert_means$slprl_num = mean(DataFI$slprl, na.rm=T)
## Warning in mean.default(DataFI$slprl, na.rm = T): argument is not numeric or
## logical: returning NA
likert_means$fltsd_num = mean(DataFI$fltsd, na.rm=T)
## Warning in mean.default(DataFI$fltsd, na.rm = T): argument is not numeric or
## logical: returning NA
likert_means$cldgng_num = mean(DataFI$cldgng, na.rm=T)
## Warning in mean.default(DataFI$cldgng, na.rm = T): argument is not numeric or
## logical: returning NA
likert_means$flteeff_num = mean(DataFI$flteeff, na.rm=T)
## Warning in mean.default(DataFI$flteeff, na.rm = T): argument is not numeric or
## logical: returning NA
library(likert)
kable_styling(kable(likert(DataFI[, c("fltdpr", "enjlf", "slprl", "wrhpp", "fltlnl", "fltsd", "flteeff", "cldgng")])$results))
| Item | None or almost none of the time | Some of the time | Most of the time | All or almost all of the time |
|---|---|---|---|---|
| fltdpr | 82.500000 | 15.00000 | 1.730769 | 0.7692308 |
| enjlf | 3.727506 | 21.97943 | 52.120823 | 22.1722365 |
| slprl | 42.233633 | 46.40565 | 7.894737 | 3.4659820 |
| wrhpp | 3.848621 | 23.28416 | 58.370750 | 14.4964721 |
| fltlnl | 78.745199 | 17.15749 | 2.816901 | 1.2804097 |
| fltsd | 69.186419 | 28.63549 | 1.729661 | 0.4484305 |
| flteeff | 59.858703 | 32.56262 | 5.587669 | 1.9910083 |
| cldgng | 50.965251 | 40.92664 | 6.177606 | 1.9305019 |
## print means, out comment if not needed
# Now I can sum the rows and calculate the mean
DataFI$CES_D8 = rowSums(DataFI[, c("fltdpr_num","enjlf_num", "flteeff_num", "slprl_num", "wrhpp_num", "fltlnl_num", "fltsd_num", "cldgng_num")])
##summary(DataFI$CES_D8)
# For the bivariate association, we used correlation analysis and regression analysis between the independent variables and depression (CES-D8).
# The correlation analysis shows that CES-D8 is positively associated with loneliness, sadness, sleeplessness, difficulty getting going, and feelings of effort, while happiness negatively correlates with depression. The p-value of < 2e-16 for all the independent variables shows a significant prediction of depression.
##The multivariate regression model
# The regression analysis demonstrated a strong relationship between the predictor variables and depression (CES-D8 scale). Where the intercept (1.035) represents the baseline depression score when all the independent variables are at zero. All the predictor variables have significant coefficients(p<2e-16) indicating that higher levels of these factors are associated with increase in depression scores. The R-squared value (0.9468) suggests that approximately 94.68% of the variation in depression scores confirming its strong predictive power. The F-statistic (4563, p< 2.2e-16) further reinforces the model’s statistical significance.
## Predictors of Clinically Significant Depression
In this section, we extend our analysis by examining predictors of clinically significant depressive symptoms, using a binary outcome variable derived from the CES-D8 depression scale.
Following Briggs et al. (2018), we use a cut-off score of 9 on the CES-D8 scale to classify participants with clinically significant depression.
# Create binary outcome for clinical depression based on CES-D8 score
DataFI$depression_clinical <- ifelse((DataFI$CES_D8-8) >= 9, 1, 0)
# Frequency distribution
table(DataFI$depression_clinical)
##
## 0 1
## 1366 175
prop.table(table(DataFI$depression_clinical))
##
## 0 1
## 0.8864374 0.1135626
# Fit logistic regression model
DataFI$gndr <- as.factor(DataFI$gndr)
DataFI$agea <- as.numeric(DataFI$agea)
DataFI$eduyrs <- as.numeric(DataFI$eduyrs)
DataFI$emplrel <- as.numeric(DataFI$emplrel)
model_logistic_full <- glm(depression_clinical ~ gndr + agea + eduyrs + emplrel,
data = DataFI, family = binomial)
# View model summary
summary(model_logistic_full)
##
## Call:
## glm(formula = depression_clinical ~ gndr + agea + eduyrs + emplrel,
## family = binomial, data = DataFI)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.256910 0.487249 -2.580 0.00989 **
## gndrFemale 0.301363 0.166969 1.805 0.07109 .
## agea -0.007809 0.004296 -1.818 0.06912 .
## eduyrs -0.034508 0.020830 -1.657 0.09760 .
## emplrel -0.131406 0.226225 -0.581 0.56133
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1060.2 on 1516 degrees of freedom
## Residual deviance: 1051.5 on 1512 degrees of freedom
## (46 observations deleted due to missingness)
## AIC: 1061.5
##
## Number of Fisher Scoring iterations: 5
exp(coef(model_logistic_full))
## (Intercept) gndrFemale agea eduyrs emplrel
## 0.2845319 1.3517003 0.9922216 0.9660806 0.8768620
# Calculate Confidence Intervals for ORs
exp(confint(model_logistic_full))
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 0.1097772 0.7428004
## gndrFemale 0.9760106 1.8799047
## agea 0.9839235 1.0006547
## eduyrs 0.9269038 1.0057911
## emplrel 0.5463544 1.3335323
r_mcfadden = with(summary(model_logistic_full), 1 - deviance/null.deviance)
r_nagelkerke = with(summary(model_logistic_full), r_mcfadden/(1 - (null.deviance / nrow(model_logistic_full$data)*log(2))))
r_nagelkerke
## [1] 0.01552694
#intrepretation : Baseline odds of clinical depression (not very interpretable on its own but included as a model constant).
gndrFemale: Females might have higher odds of clinical depression compared to males, but the lower bound being 0.999 makes this borderline — we should check the p-value to confirm statistical significance.
agea: No significant association with odds of clinical depression.
#the maximum possible value, if you are “very depressed” is 28 #on the other hand the minimum possible value is 7
#H1:the more feeling of depression “fltdpr” is associated with higher levels of depression #H1:enjoy life “enjlfl” is associated with high levels of depression #H1:Sleeplessness “slprl” is associated with high levels of depression #H1:sadness “fltsd” is associated to increase of depression levels #H1: loneliness “fltlnl” is associated to higher depression levels #H1: happiness “wrhpp” is associated to low levels of depression #H1: “cldgng” Higher frequency of feeling unable to get going is associated with increased depression level #H1: “Flteeff” Higher frequency of feeling that everything was an effort is associated with increased depression levels #All of them show positive correlation with the depression symptoms apart from wrhpp which shows a negative correlation #all of these variables have the same scale of 0-10
#interaction of these variables for Finland # Correlation matrix correlation <- cor(subset_vars, use = “complete.obs”) correlation
modelFI = lm(DataFI$CES_D8 ~ fltdpr + enjlf + slprl + wrhpp + fltlnl + fltsd + cldgng + flteeff ,data = DataFI) summary(modelFI) #it seems that the hypotheses stated above are mostly correct,
modelFI <- lm(CES_D8 ~ fltdpr_num + enjlf_num + slprl_num + wrhpp_num + fltlnl_num + fltsd_num + cldgng_num + flteeff_num, data = DataFI)
model = lm(CES_D8 ~ fltdpr_num +enjlf_num + slprl_num + fltsd_num + fltlnl_num + cldgng_num + flteeff_num, data = DataFI) summary(model)
vnames = c(“fltdpr_num”, “enjlf_num”, “slprl_num”, “wrhpp_num”, “fltlnl_num”, “fltsd_num”, “flteeff_num”, “cldgng_num”) likert_df = df[,vnames]
likert(likert_df)
plot(likert(likert_df))
#Append mean -> convert to numeric likert_numeric_df = as.data.frame(lapply((df[,vnames]), as.numeric))
likert_means = lapply((likert_numeric_df[,vnames]), mean, na.rm=T)
likert_table = likert(likert_df)\(results # we save the "inner" data frame of the likert structure ... likert_table\)Mean = unlist(likert_means) # … and append new columns to the data frame likert_table$Count = unlist(likert_counts) likert_table # print extended table, outcomment if not needed
likert_table$Item = c(
d20=“you felt depression?”, d21=“you felt everything you did was an effort?”, d22=“your sleep was restless?”, d23=“you were happy?”, d24=“you felt lonely?”, d25=“you enjoyed life?”, d26=“you felt sad?”, d27=“you could not get going 65?.” )
likert_table[,2:6] = round(likert_table[,2:6],1) # round means to 3 decimal digits likert_table[,7] = round(likert_table[,7],3) # create formatted table kable_styling(kable(likert_table, caption = “Distribution of answers regarding depression scale (ESS round 11, only Finland” ))
plot(likert(summary=likert_table[,1:6])) # limit to columns 1:6 to skip mean and count #Regression model for design model
lm(depression ~ agea +gndr+smkstat+eduyrs+region,data = df_fin,weights = dweight)
Interpretation: Baseline odds of clinical depression (not very interpretable on its own but included as a model constant).
gndrFemale: Females might have higher odds of clinical depression compared to males, but the lower bound being 0.999 makes this borderline — we should check the p-value to confirm statistical significance.
agea: No significant association with odds of clinical depression.
## R Markdown
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.
When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
``` r
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
Social Health Determinants as Explanatory Variables: