library(foreign)
#install.packages("ltm") # required to calculate Cronbach's alpha
library(ltm)

## Loading required package: MASS

## Loading required package: msm

## Loading required package: polycor

setwd("/Users/leonieanagnostopoulos/Downloads")# read ESS11 data and assign to data frame
df = read.spss("ESS11.sav", to.data.frame = T)
library(kableExtra) # create formatted tables

Statistical Analysis of ESS 11 Data

Introduction

Depression is a significant public health issue with far-reaching consequences, affecting individuals’ emotional and physical well-being. Understanding the social determinants of depression is essential for developing effective interventions. This study utilizes the CES-D8 Depression Scale, a concise and reliable measure, to operationalize depression as the dependent variable and explores its relationship with various social factors. A key focus of this analysis is physical activity, measured as the number of days in the past week an individual engaged in sports or other physical activities. Physical activity is widely recognized for its mental health benefits, yet its interaction with other social determinants remains underexplored. Additionally, this study examines alcohol consumption as a potential risk factor, hypothesizing that higher alcohol intake is associated with increased depressive symptoms. By analyzing data from the European Social Survey (ESS) 11, this study investigates how variables such as socioeconomic status, social support, and lifestyle behaviors predict depressive symptoms. Focusing on Iceland, the analysis employs quantitative methods to assess the predictive power of these determinants. The findings aim to provide empirical evidence for targeted interventions and inform public health strategies. By highlighting actionable pathways, this research contributes to the broader understanding of mental health determinants and underscores the importance of addressing social factors in combating depression.

# Select country: Iceland
# Subset data and describe long ordinal variable by frequency distribution
# Alternative 1
# variable: State of depression in country nowadays (fltdpr [1-4])
table(df$fltdpr[df$cntry=="Iceland"])

## 
## None or almost none of the time                Some of the time 
##                             510                             276 
##                Most of the time   All or almost all of the time 
##                              36                              15

#redefine data 
df=df[df$cntry=="Iceland", ]

names(df)

##   [1] "name"      "essround"  "edition"   "proddate"  "idno"      "cntry"    
##   [7] "dweight"   "pspwght"   "pweight"   "anweight"  "nwspol"    "netusoft" 
##  [13] "netustm"   "ppltrst"   "pplfair"   "pplhlp"    "polintr"   "psppsgva" 
##  [19] "actrolga"  "psppipla"  "cptppola"  "trstprl"   "trstlgl"   "trstplc"  
##  [25] "trstplt"   "trstprt"   "trstep"    "trstun"    "vote"      "prtvtdat" 
##  [31] "prtvtebe"  "prtvtchr"  "prtvtccy"  "prtvtffi"  "prtvtffr"  "prtvgde1" 
##  [37] "prtvgde2"  "prtvtegr"  "prtvthhu"  "prtvteis"  "prtvteie"  "prtvteit" 
##  [43] "prtvclt1"  "prtvclt2"  "prtvclt3"  "prtvtinl"  "prtvtcno"  "prtvtfpl" 
##  [49] "prtvtept"  "prtvtbrs"  "prtvtesk"  "prtvtgsi"  "prtvtges"  "prtvtdse" 
##  [55] "prtvthch"  "prtvtdgb"  "contplt"   "donprty"   "badge"     "sgnptit"  
##  [61] "pbldmna"   "bctprd"    "pstplonl"  "volunfp"   "clsprty"   "prtcleat" 
##  [67] "prtclebe"  "prtclbhr"  "prtclccy"  "prtclgfi"  "prtclgfr"  "prtclgde" 
##  [73] "prtclegr"  "prtclihu"  "prtcleis"  "prtclfie"  "prtclfit"  "prtclclt" 
##  [79] "prtclhnl"  "prtclcno"  "prtcljpl"  "prtclgpt"  "prtclbrs"  "prtclesk" 
##  [85] "prtclgsi"  "prtclhes"  "prtcldse"  "prtclhch"  "prtcldgb"  "prtdgcl"  
##  [91] "lrscale"   "stflife"   "stfeco"    "stfgov"    "stfdem"    "stfedu"   
##  [97] "stfhlth"   "gincdif"   "freehms"   "hmsfmlsh"  "hmsacld"   "euftf"    
## [103] "lrnobed"   "loylead"   "imsmetn"   "imdfetn"   "impcntr"   "imbgeco"  
## [109] "imueclt"   "imwbcnt"   "happy"     "sclmeet"   "inprdsc"   "sclact"   
## [115] "crmvct"    "aesfdrk"   "health"    "hlthhmp"   "atchctr"   "atcherp"  
## [121] "rlgblg"    "rlgdnm"    "rlgdnbat"  "rlgdnacy"  "rlgdnafi"  "rlgdnade" 
## [127] "rlgdnagr"  "rlgdnhu"   "rlgdnais"  "rlgdnie"   "rlgdnlt"   "rlgdnanl" 
## [133] "rlgdnno"   "rlgdnapl"  "rlgdnapt"  "rlgdnrs"   "rlgdnask"  "rlgdnase" 
## [139] "rlgdnach"  "rlgdngb"   "rlgblge"   "rlgdnme"   "rlgdebat"  "rlgdeacy" 
## [145] "rlgdeafi"  "rlgdeade"  "rlgdeagr"  "rlgdehu"   "rlgdeais"  "rlgdeie"  
## [151] "rlgdelt"   "rlgdeanl"  "rlgdeno"   "rlgdeapl"  "rlgdeapt"  "rlgders"  
## [157] "rlgdeask"  "rlgdease"  "rlgdeach"  "rlgdegb"   "rlgdgr"    "rlgatnd"  
## [163] "pray"      "dscrgrp"   "dscrrce"   "dscrntn"   "dscrrlg"   "dscrlng"  
## [169] "dscretn"   "dscrage"   "dscrgnd"   "dscrsex"   "dscrdsb"   "dscroth"  
## [175] "dscrdk"    "dscrref"   "dscrnap"   "dscrna"    "ctzcntr"   "brncntr"  
## [181] "cntbrthd"  "livecnta"  "lnghom1"   "lnghom2"   "feethngr"  "facntr"   
## [187] "fbrncntc"  "mocntr"    "mbrncntc"  "ccnthum"   "ccrdprs"   "wrclmch"  
## [193] "admrclc"   "testjc34"  "testjc35"  "testjc36"  "testjc37"  "testjc38" 
## [199] "testjc39"  "testjc40"  "testjc41"  "testjc42"  "vteurmmb"  "vteubcmb" 
## [205] "ctrlife"   "etfruit"   "eatveg"    "dosprt"    "cgtsmok"   "alcfreq"  
## [211] "alcwkdy"   "alcwknd"   "icgndra"   "alcbnge"   "height"    "weighta"  
## [217] "dshltgp"   "dshltms"   "dshltnt"   "dshltref"  "dshltdk"   "dshltna"  
## [223] "medtrun"   "medtrnp"   "medtrnt"   "medtroc"   "medtrnl"   "medtrwl"  
## [229] "medtrnaa"  "medtroth"  "medtrnap"  "medtrref"  "medtrdk"   "medtrna"  
## [235] "medtrnu"   "hlpfmly"   "hlpfmhr"   "trhltacu"  "trhltacp"  "trhltcm"  
## [241] "trhltch"   "trhltos"   "trhltho"   "trhltht"   "trhlthy"   "trhltmt"  
## [247] "trhltpt"   "trhltre"   "trhltsh"   "trhltnt"   "trhltref"  "trhltdk"  
## [253] "trhltna"   "fltdpr"    "flteeff"   "slprl"     "wrhpp"     "fltlnl"   
## [259] "enjlf"     "fltsd"     "cldgng"    "hltprhc"   "hltprhb"   "hltprbp"  
## [265] "hltpral"   "hltprbn"   "hltprpa"   "hltprpf"   "hltprsd"   "hltprsc"  
## [271] "hltprsh"   "hltprdi"   "hltprnt"   "hltprref"  "hltprdk"   "hltprna"  
## [277] "hltphhc"   "hltphhb"   "hltphbp"   "hltphal"   "hltphbn"   "hltphpa"  
## [283] "hltphpf"   "hltphsd"   "hltphsc"   "hltphsh"   "hltphdi"   "hltphnt"  
## [289] "hltphnap"  "hltphref"  "hltphdk"   "hltphna"   "hltprca"   "cancfre"  
## [295] "cnfpplh"   "fnsdfml"   "jbexpvi"   "jbexpti"   "jbexpml"   "jbexpmc"  
## [301] "jbexpnt"   "jbexpnap"  "jbexpref"  "jbexpdk"   "jbexpna"   "jbexevl"  
## [307] "jbexevh"   "jbexevc"   "jbexera"   "jbexecp"   "jbexebs"   "jbexent"  
## [313] "jbexenap"  "jbexeref"  "jbexedk"   "jbexena"   "nobingnd"  "likrisk"  
## [319] "liklead"   "sothnds"   "actcomp"   "mascfel"   "femifel"   "impbemw"  
## [325] "trmedmw"   "trwrkmw"   "trplcmw"   "trmdcnt"   "trwkcnt"   "trplcnt"  
## [331] "eqwrkbg"   "eqpolbg"   "eqmgmbg"   "eqpaybg"   "eqparep"   "eqparlv"  
## [337] "freinsw"   "fineqpy"   "wsekpwr"   "weasoff"   "wlespdm"   "wexashr"  
## [343] "wprtbym"   "wbrgwrm"   "hhmmb"     "gndr"      "gndr2"     "gndr3"    
## [349] "gndr4"     "gndr5"     "gndr6"     "gndr7"     "gndr8"     "gndr9"    
## [355] "gndr10"    "gndr11"    "gndr12"    "yrbrn"     "agea"      "yrbrn2"   
## [361] "yrbrn3"    "yrbrn4"    "yrbrn5"    "yrbrn6"    "yrbrn7"    "yrbrn8"   
## [367] "yrbrn9"    "yrbrn10"   "yrbrn11"   "yrbrn12"   "rshipa2"   "rshipa3"  
## [373] "rshipa4"   "rshipa5"   "rshipa6"   "rshipa7"   "rshipa8"   "rshipa9"  
## [379] "rshipa10"  "rshipa11"  "rshipa12"  "rshpsts"   "rshpsgb"   "lvgptnea" 
## [385] "dvrcdeva"  "marsts"    "marstgb"   "maritalb"  "chldhhe"   "domicil"  
## [391] "paccmoro"  "paccdwlr"  "pacclift"  "paccnbsh"  "paccocrw"  "paccxhoc" 
## [397] "paccnois"  "paccinro"  "paccnt"    "paccref"   "paccdk"    "paccna"   
## [403] "edulvlb"   "eisced"    "edlveat"   "edlvebe"   "edlvehr"   "edlvgcy"  
## [409] "edlvdfi"   "edlvdfr"   "edudde1"   "educde2"   "edlvegr"   "edlvdahu" 
## [415] "edlvdis"   "edlvdie"   "edlvfit"   "edlvdlt"   "edlvenl"   "edlveno"  
## [421] "edlvipl"   "edlvept"   "edlvdrs"   "edlvdsk"   "edlvesi"   "edlvies"  
## [427] "edlvdse"   "edlvdch"   "educgb1"   "edubgb2"   "edagegb"   "eduyrs"   
## [433] "pdwrk"     "edctn"     "uempla"    "uempli"    "dsbld"     "rtrd"     
## [439] "cmsrv"     "hswrk"     "dngoth"    "dngref"    "dngdk"     "dngna"    
## [445] "mainact"   "mnactic"   "crpdwk"    "pdjobev"   "pdjobyr"   "emplrel"  
## [451] "emplno"    "wrkctra"   "estsz"     "jbspv"     "njbspv"    "wkdcorga" 
## [457] "iorgact"   "wkhct"     "wkhtot"    "nacer2"    "tporgwk"   "isco08"   
## [463] "wrkac6m"   "uemp3m"    "uemp12m"   "uemp5yr"   "mbtru"     "hincsrca" 
## [469] "hinctnta"  "hincfel"   "edulvlpb"  "eiscedp"   "edlvpfat"  "edlvpebe" 
## [475] "edlvpehr"  "edlvpgcy"  "edlvpdfi"  "edlvpdfr"  "edupdde1"  "edupcde2" 
## [481] "edlvpegr"  "edlvpdahu" "edlvpdis"  "edlvpdie"  "edlvpfit"  "edlvpdlt" 
## [487] "edlvpenl"  "edlvpeno"  "edlvphpl"  "edlvpept"  "edlvpdrs"  "edlvpdsk" 
## [493] "edlvpesi"  "edlvphes"  "edlvpdse"  "edlvpdch"  "edupcgb1"  "edupbgb2" 
## [499] "edagepgb"  "pdwrkp"    "edctnp"    "uemplap"   "uemplip"   "dsbldp"   
## [505] "rtrdp"     "cmsrvp"    "hswrkp"    "dngothp"   "dngdkp"    "dngnapp"  
## [511] "dngrefp"   "dngnap"    "mnactp"    "crpdwkp"   "isco08p"   "emprelp"  
## [517] "wkhtotp"   "edulvlfb"  "eiscedf"   "edlvfeat"  "edlvfebe"  "edlvfehr" 
## [523] "edlvfgcy"  "edlvfdfi"  "edlvfdfr"  "edufcde1"  "edufbde2"  "edlvfegr" 
## [529] "edlvfdahu" "edlvfdis"  "edlvfdie"  "edlvffit"  "edlvfdlt"  "edlvfenl" 
## [535] "edlvfeno"  "edlvfgpl"  "edlvfept"  "edlvfdrs"  "edlvfdsk"  "edlvfesi" 
## [541] "edlvfges"  "edlvfdse"  "edlvfdch"  "edufcgb1"  "edufbgb2"  "edagefgb" 
## [547] "emprf14"   "occf14b"   "edulvlmb"  "eiscedm"   "edlvmeat"  "edlvmebe" 
## [553] "edlvmehr"  "edlvmgcy"  "edlvmdfi"  "edlvmdfr"  "edumcde1"  "edumbde2" 
## [559] "edlvmegr"  "edlvmdahu" "edlvmdis"  "edlvmdie"  "edlvmfit"  "edlvmdlt" 
## [565] "edlvmenl"  "edlvmeno"  "edlvmgpl"  "edlvmept"  "edlvmdrs"  "edlvmdsk" 
## [571] "edlvmesi"  "edlvmges"  "edlvmdse"  "edlvmdch"  "edumcgb1"  "edumbgb2" 
## [577] "edagemgb"  "emprm14"   "occm14b"   "atncrse"   "anctrya1"  "anctrya2" 
## [583] "regunit"   "region"    "ipcrtiva"  "impricha"  "ipeqopta"  "ipshabta" 
## [589] "impsafea"  "impdiffa"  "ipfrulea"  "ipudrsta"  "ipmodsta"  "ipgdtima" 
## [595] "impfreea"  "iphlppla"  "ipsucesa"  "ipstrgva"  "ipadvnta"  "ipbhprpa" 
## [601] "iprspota"  "iplylfra"  "impenva"   "imptrada"  "impfuna"   "testji1"  
## [607] "testji2"   "testji3"   "testji4"   "testji5"   "testji6"   "testji7"  
## [613] "testji8"   "testji9"   "respc19a"  "symtc19"   "symtnc19"  "vacc19"   
## [619] "recon"     "inwds"     "ainws"     "ainwe"     "binwe"     "cinwe"    
## [625] "dinwe"     "einwe"     "finwe"     "hinwe"     "iinwe"     "kinwe"    
## [631] "rinwe"     "inwde"     "jinws"     "jinwe"     "inwtm"     "mode"     
## [637] "domain"    "prob"      "stratum"   "psu"

summary(df$depres)

## Length  Class   Mode 
##      0   NULL   NULL

Literature

Sport is widely recognized as one of the most effective forms of physical activity, offering significant benefits for mental health. Participation in organized sports is associated with higher levels of psychological and social well-being, as it fosters social connections, enhances self-esteem, and increases overall life satisfaction. These factors collectively contribute to a reduction in stress, anxiety, and depression. Moreover, engagement in moderate-to-vigorous physical activity has been shown to lower the risk of mental health issues among both children and adults compared to those who remain physically inactive (Tahira, 2023). Beyond its psychological benefits, physical activity may exert an antidepressant effect through physiological mechanisms. Regular exercise has been linked to an increase in interleukin-10, an anti-inflammatory marker, while simultaneously reducing interleukin-6, a pro-inflammatory marker, and serum thiobarbituric acid-reactive substances (TBARS), an indicator of oxidative stress. Additionally, participation in group sports facilitates social interaction, strengthening social bonds and further mitigating depressive symptoms. The relationship between physical activity and depression management is also reflected in clinical recommendations. According to the National Institute for Health and Care Excellence (NICE) guidelines, engaging in group-based sports for 45–60 minutes, up to three times per week over a period of 10–14 weeks, is advised as an effective strategy for reducing symptoms of depression. Various forms of exercise—including aerobic (cardiorespiratory), anaerobic (high-intensity, short-duration activities), and flexibility-based exercises (such as stretching)—have demonstrated positive effects on depression. Furthermore, research highlights a significant correlation between professional exercise interventions and improved outcomes in depression treatment (Machaczek et al., 2021). Alcohol consumption and depressive disorders are interconnected through both genetic predisposition and environmental influences, contributing to the development and progression of both conditions. Furthermore, these two issues tend to reinforce one another, creating a cyclical pattern that exacerbates their severity (Kuria et al., 2012b). Research indicates that individuals struggling with alcohol use disorders are twice as likely to experience depression compared to those without such issues. Studies conducted among 4 veterans with alcohol dependency suggest that reducing alcohol intake has a beneficial effect on depressive symptoms. This finding underscores the complex interaction between alcohol consumption and mental health, highlighting the potential for symptom improvement through behavioral changes. Clinical trials involving antidepressants have further demonstrated that individuals suffering from both depression and alcohol use disorders report a reduction in depressive symptoms even when receiving a placebo. This suggests that a decrease in alcohol consumption itself contributes to mood improvement. Additionally, patients who participated in counseling sessions aimed at reducing alcohol intake exhibited notable enhancements in both their alcohol-related behaviors and overall mental well-being. These findings reinforce the detrimental impact of alcohol on mental health and emphasize the importance of limiting alcohol consumption as a critical component of effective depression treatment (Nunes, 2023).

Methods

This study is based on the large ESS11 dataset, foucsing on respondents in Iceland. Emotional well-being is assessed using 8 variables: o Depression (fltdpr) o perceived effort (flteeff) o restless sleep (slprl) o happiness (wrhpp) o loneliness (fltlnl) o joie de vivre (enjlf) o sadness (fltsd) o Starting difficulties (cldgng)

For a continuous analysis, these variables were numerically transformed, whereby the scales for happiness (d23) and joie de vivre (d25) had to be reversed to align them with the direction of the other variables. To establish internal consistency between the emotional variables (fltdpr, flteef, slprl, wrhpp, fltlnl, enjlf, fltsd, cldgng) Cronbach’s alpha and the mean were calculated. The dependent variable depression was analyzed using descriptive statistics, including the mean, median, quartiles and histogram. To establish a relationship between the dependent variable depression and the comparison variables, pairwise correlation coefficients were calculated and evaluated using Pearson’s correlation. Two hypotheses were tested regarding the relationship between the depression score and sports participation on the one hand and the depression score and the frequency of alcohol consumption on the other. An analysis of variance (ANOVA) was carried out to test the hypotheses. To assess the relationship between alcohol consumption and depression, both as a stand-alone model and in a multivariate framework, a Pearson’s correlation was used to examine the relationships between the variables.

Depression Scale Calculation and Reliability

# convert emotional variables to numeric
df$d20 = as.numeric(df$fltdpr)
df$d21 = as.numeric(df$flteeff)
df$d22 = as.numeric(df$slprl)
df$d23 = as.numeric(df$wrhpp)
df$d24 = as.numeric(df$fltlnl)
df$d25 = as.numeric(df$enjlf)
df$d26 = as.numeric(df$fltsd)
df$d27 = as.numeric(df$cldgng)


# Reverse scales of d23 and d25 (differently poled than)

df$d23 = 5 - df$d23
df$d25 = 5 - df$d25


# Calculate Cronbach's alpha to check internal consistency ("reliability") of tolerance items
cronbach.alpha(df[,c("d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27")], na.rm=T)

## 
## Cronbach's alpha for the 'df[, c("d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27")]' data-set
## 
## Items: 8
## Sample units: 842
## alpha: 0.82

# Create depression score (mean of 8 items); score = mean of items row wise = sum of item values / number of items 
df$depres = rowSums(df[,c("d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27")]) / 8

Results

Bivariate Associations

1 st Hypothesis: The more sports people do, the less depressed they are. An analysis of the Pearson correlation between the depression score and the frequency of sporting activity revealed the following: r = - 0.188 t = - 5.51 , df = 826 p < 0.001 95% Confidence Interval: – 0.253, - 0.122 Hypothesis number one is that the more sport people do, the less depressed they are. Running a correlation analysis between the dependent variable depression and the longscaled variable doing sport revealed a negative correlation. The correlation coefficient of r = - 0.188 demonstrates that a higher level of physical activity is associated with a lower level of depression. Even if the correlation is not extremely high it still is significant. Furthermore, its confidence interval confirms that this effect is probably not due to chance.

# Convert sport variable to numeric
df$dosprt = as.numeric(df$dosprt)

# Correlation test
cor.test(df$depres, df$dosprt)

## 
##  Pearson's product-moment correlation
## 
## data:  df$depres and df$dosprt
## t = -5.5077, df = 826, p-value = 4.858e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2530975 -0.1216399
## sample estimates:
##        cor 
## -0.1882115

# Ensure all variables exist
df$alcfreq = as.numeric(df$alcfreq)
df$eduyrs = as.numeric(df$eduyrs)
df$hinctnta = as.numeric(df$hinctnta)

# Regression model predicting depression
model <- lm(depres ~ dosprt + alcfreq + eduyrs + hinctnta, data = df, weights = pspwght)
summary(model)

## 
## Call:
## lm(formula = depres ~ dosprt + alcfreq + eduyrs + hinctnta, data = df, 
##     weights = pspwght)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4674 -0.3329 -0.1367  0.1279  2.8002 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.0672170  0.1128196  18.323  < 2e-16 ***
## dosprt      -0.0298171  0.0079665  -3.743 0.000196 ***
## alcfreq      0.0176278  0.0109129   1.615 0.106661    
## eduyrs       0.0009354  0.0049878   0.188 0.851286    
## hinctnta    -0.0507387  0.0071362  -7.110  2.7e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5031 on 753 degrees of freedom
##   (84 observations deleted due to missingness)
## Multiple R-squared:  0.09242,    Adjusted R-squared:  0.0876 
## F-statistic: 19.17 on 4 and 753 DF,  p-value: 4.984e-15

#Show descriptives:
summary(df$depres)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.000   1.250   1.500   1.630   1.875   3.875      11

hist(df$depres)

table(df$depres)

## 
##     1 1.125  1.25 1.375   1.5 1.625  1.75 1.875     2 2.125  2.25 2.375   2.5 
##    56    84    99    92   116    77    70    57    42    36    31    12    12 
## 2.625  2.75 2.875     3 3.125  3.25 3.375   3.5 3.625  3.75 3.875 
##    13    15     3     3     6     1     2     1     1     1     1

# Hypothesis 1: The more sports people do the less depressed they are
# we have to calculate correlation coefficient 

# pairwise correlation 
df$dosprt = as.numeric(df$dosprt)
cor(df[,c("depres", "dosprt")], use="complete.obs")

##            depres     dosprt
## depres  1.0000000 -0.1882115
## dosprt -0.1882115  1.0000000

# ((cor(df$depres, df$dosprt, )))
cor.test(df$depres, df$dosprt)

## 
##  Pearson's product-moment correlation
## 
## data:  df$depres and df$dosprt
## t = -5.5077, df = 826, p-value = 4.858e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2530975 -0.1216399
## sample estimates:
##        cor 
## -0.1882115

2 nd Hypothesis: The more alcohol people consume, the more depressed they are. r = 0.08 p = 0.0163 Analyses were conducted to test the hypothesis that high alcohol consumption is associated with a higher risk of depression. In this case, a correlation coefficient of r = 0.08 was found. Performing a correlation analysis between the dependent variable depression and the long-scale variable alcohol consumption revealed a slightly positive correlation. Given this very low, almost negligible correlation in this data set, one can assume that other factors have a stronger influence on depression than alcohol.

One-way ANOVA Results: F-statistics = 5.79 p = 0.0163 A more in-depth analysis had to be applied to compare the mean depression scores to different alcohol consumption categories. Here, an ANOVA analysis technique was used. This resulted in a significant difference being found between the groups. Depression scores were highest among daily drinkers, with a mean score of 2.09, whereas the respondents who consume alcohol in moderation have a lower value of 1.5. However, an unexpected twist here is that those who never consume alcohol have a slightly higher risk of depression than moderate drinkers.

# Transform variable into numeric
df$alcfreq_n = as.numeric(df$alcfreq)
table(df$alcfreq_n)

## 
##   1   2   3   4   5   6   7 
##   7 103 128 167  93 166 177

# pairwise correlation 
df$alcfreq = as.numeric(df$alcfreq)
cor(df[,c("depres", "alcfreq")], use="complete.obs")

##             depres    alcfreq
## depres  1.00000000 0.08328278
## alcfreq 0.08328278 1.00000000

# ((cor(df$depres, df$alcfreq, )))


# remove missing data
anova_data <- df[!is.na(df$alcfreq) & !is.na(df$depres), ]


# ANOVA
one.way <- aov(depres ~ alcfreq_n, data = anova_data)

summary(one.way)

##              Df Sum Sq Mean Sq F value Pr(>F)  
## alcfreq_n     1   1.34   1.343    5.79 0.0163 *
## Residuals   829 192.31   0.232                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

lm(depres ~ alcfreq_n, data = df)

## 
## Call:
## lm(formula = depres ~ alcfreq_n, data = df)
## 
## Coefficients:
## (Intercept)    alcfreq_n  
##     1.52052      0.02323

# show group means of depres by alcfreq
by(df$depres, df$alcfreq, mean, na.rm=T)

## df$alcfreq: 1
## [1] 2.089286
## ------------------------------------------------------------ 
## df$alcfreq: 2
## [1] 1.637376
## ------------------------------------------------------------ 
## df$alcfreq: 3
## [1] 1.503906
## ------------------------------------------------------------ 
## df$alcfreq: 4
## [1] 1.57803
## ------------------------------------------------------------ 
## df$alcfreq: 5
## [1] 1.59375
## ------------------------------------------------------------ 
## df$alcfreq: 6
## [1] 1.727896
## ------------------------------------------------------------ 
## df$alcfreq: 7
## [1] 1.676006

# test if at least one depression mean differs from all others
model1 = oneway.test(df$depres ~ df$alcfreq) 
model1

## 
##  One-way analysis of means (not assuming equal variances)
## 
## data:  df$depres and df$alcfreq
## F = 3.8929, num df = 6.000, denom df = 80.513, p-value = 0.001832

Topic: How to report simple frequency distributions for Likert scales?

library(likert)     # create basic Likert tables and plots

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 4.3.3

## Loading required package: xtable

## Warning: package 'xtable' was built under R version 4.3.3

library(kableExtra) # create formatted tables 
library (foreign)

LIKERT SCALE FREQUENCY TABLES

ESS questions “d20”, “d21”, “d22”, “d23”, “d24”, “d25”, “d26”, “d27”

straighforward approach: create Likert table and Likert plot

vnames = c("fltdpr", "flteeff", "slprl","wrhpp", "fltlnl", "enjlf", "fltsd","cldgng")
vnames[1]

## [1] "fltdpr"

vnames[2]

## [1] "flteeff"

vnames[3]

## [1] "slprl"

vnames[4]

## [1] "wrhpp"

vnames[5]

## [1] "fltlnl"

vnames[6]

## [1] "enjlf"

vnames[7]

## [1] "fltsd"

vnames[8]

## [1] "cldgng"

likert_df = df[,vnames]
names(df)

##   [1] "name"      "essround"  "edition"   "proddate"  "idno"      "cntry"    
##   [7] "dweight"   "pspwght"   "pweight"   "anweight"  "nwspol"    "netusoft" 
##  [13] "netustm"   "ppltrst"   "pplfair"   "pplhlp"    "polintr"   "psppsgva" 
##  [19] "actrolga"  "psppipla"  "cptppola"  "trstprl"   "trstlgl"   "trstplc"  
##  [25] "trstplt"   "trstprt"   "trstep"    "trstun"    "vote"      "prtvtdat" 
##  [31] "prtvtebe"  "prtvtchr"  "prtvtccy"  "prtvtffi"  "prtvtffr"  "prtvgde1" 
##  [37] "prtvgde2"  "prtvtegr"  "prtvthhu"  "prtvteis"  "prtvteie"  "prtvteit" 
##  [43] "prtvclt1"  "prtvclt2"  "prtvclt3"  "prtvtinl"  "prtvtcno"  "prtvtfpl" 
##  [49] "prtvtept"  "prtvtbrs"  "prtvtesk"  "prtvtgsi"  "prtvtges"  "prtvtdse" 
##  [55] "prtvthch"  "prtvtdgb"  "contplt"   "donprty"   "badge"     "sgnptit"  
##  [61] "pbldmna"   "bctprd"    "pstplonl"  "volunfp"   "clsprty"   "prtcleat" 
##  [67] "prtclebe"  "prtclbhr"  "prtclccy"  "prtclgfi"  "prtclgfr"  "prtclgde" 
##  [73] "prtclegr"  "prtclihu"  "prtcleis"  "prtclfie"  "prtclfit"  "prtclclt" 
##  [79] "prtclhnl"  "prtclcno"  "prtcljpl"  "prtclgpt"  "prtclbrs"  "prtclesk" 
##  [85] "prtclgsi"  "prtclhes"  "prtcldse"  "prtclhch"  "prtcldgb"  "prtdgcl"  
##  [91] "lrscale"   "stflife"   "stfeco"    "stfgov"    "stfdem"    "stfedu"   
##  [97] "stfhlth"   "gincdif"   "freehms"   "hmsfmlsh"  "hmsacld"   "euftf"    
## [103] "lrnobed"   "loylead"   "imsmetn"   "imdfetn"   "impcntr"   "imbgeco"  
## [109] "imueclt"   "imwbcnt"   "happy"     "sclmeet"   "inprdsc"   "sclact"   
## [115] "crmvct"    "aesfdrk"   "health"    "hlthhmp"   "atchctr"   "atcherp"  
## [121] "rlgblg"    "rlgdnm"    "rlgdnbat"  "rlgdnacy"  "rlgdnafi"  "rlgdnade" 
## [127] "rlgdnagr"  "rlgdnhu"   "rlgdnais"  "rlgdnie"   "rlgdnlt"   "rlgdnanl" 
## [133] "rlgdnno"   "rlgdnapl"  "rlgdnapt"  "rlgdnrs"   "rlgdnask"  "rlgdnase" 
## [139] "rlgdnach"  "rlgdngb"   "rlgblge"   "rlgdnme"   "rlgdebat"  "rlgdeacy" 
## [145] "rlgdeafi"  "rlgdeade"  "rlgdeagr"  "rlgdehu"   "rlgdeais"  "rlgdeie"  
## [151] "rlgdelt"   "rlgdeanl"  "rlgdeno"   "rlgdeapl"  "rlgdeapt"  "rlgders"  
## [157] "rlgdeask"  "rlgdease"  "rlgdeach"  "rlgdegb"   "rlgdgr"    "rlgatnd"  
## [163] "pray"      "dscrgrp"   "dscrrce"   "dscrntn"   "dscrrlg"   "dscrlng"  
## [169] "dscretn"   "dscrage"   "dscrgnd"   "dscrsex"   "dscrdsb"   "dscroth"  
## [175] "dscrdk"    "dscrref"   "dscrnap"   "dscrna"    "ctzcntr"   "brncntr"  
## [181] "cntbrthd"  "livecnta"  "lnghom1"   "lnghom2"   "feethngr"  "facntr"   
## [187] "fbrncntc"  "mocntr"    "mbrncntc"  "ccnthum"   "ccrdprs"   "wrclmch"  
## [193] "admrclc"   "testjc34"  "testjc35"  "testjc36"  "testjc37"  "testjc38" 
## [199] "testjc39"  "testjc40"  "testjc41"  "testjc42"  "vteurmmb"  "vteubcmb" 
## [205] "ctrlife"   "etfruit"   "eatveg"    "dosprt"    "cgtsmok"   "alcfreq"  
## [211] "alcwkdy"   "alcwknd"   "icgndra"   "alcbnge"   "height"    "weighta"  
## [217] "dshltgp"   "dshltms"   "dshltnt"   "dshltref"  "dshltdk"   "dshltna"  
## [223] "medtrun"   "medtrnp"   "medtrnt"   "medtroc"   "medtrnl"   "medtrwl"  
## [229] "medtrnaa"  "medtroth"  "medtrnap"  "medtrref"  "medtrdk"   "medtrna"  
## [235] "medtrnu"   "hlpfmly"   "hlpfmhr"   "trhltacu"  "trhltacp"  "trhltcm"  
## [241] "trhltch"   "trhltos"   "trhltho"   "trhltht"   "trhlthy"   "trhltmt"  
## [247] "trhltpt"   "trhltre"   "trhltsh"   "trhltnt"   "trhltref"  "trhltdk"  
## [253] "trhltna"   "fltdpr"    "flteeff"   "slprl"     "wrhpp"     "fltlnl"   
## [259] "enjlf"     "fltsd"     "cldgng"    "hltprhc"   "hltprhb"   "hltprbp"  
## [265] "hltpral"   "hltprbn"   "hltprpa"   "hltprpf"   "hltprsd"   "hltprsc"  
## [271] "hltprsh"   "hltprdi"   "hltprnt"   "hltprref"  "hltprdk"   "hltprna"  
## [277] "hltphhc"   "hltphhb"   "hltphbp"   "hltphal"   "hltphbn"   "hltphpa"  
## [283] "hltphpf"   "hltphsd"   "hltphsc"   "hltphsh"   "hltphdi"   "hltphnt"  
## [289] "hltphnap"  "hltphref"  "hltphdk"   "hltphna"   "hltprca"   "cancfre"  
## [295] "cnfpplh"   "fnsdfml"   "jbexpvi"   "jbexpti"   "jbexpml"   "jbexpmc"  
## [301] "jbexpnt"   "jbexpnap"  "jbexpref"  "jbexpdk"   "jbexpna"   "jbexevl"  
## [307] "jbexevh"   "jbexevc"   "jbexera"   "jbexecp"   "jbexebs"   "jbexent"  
## [313] "jbexenap"  "jbexeref"  "jbexedk"   "jbexena"   "nobingnd"  "likrisk"  
## [319] "liklead"   "sothnds"   "actcomp"   "mascfel"   "femifel"   "impbemw"  
## [325] "trmedmw"   "trwrkmw"   "trplcmw"   "trmdcnt"   "trwkcnt"   "trplcnt"  
## [331] "eqwrkbg"   "eqpolbg"   "eqmgmbg"   "eqpaybg"   "eqparep"   "eqparlv"  
## [337] "freinsw"   "fineqpy"   "wsekpwr"   "weasoff"   "wlespdm"   "wexashr"  
## [343] "wprtbym"   "wbrgwrm"   "hhmmb"     "gndr"      "gndr2"     "gndr3"    
## [349] "gndr4"     "gndr5"     "gndr6"     "gndr7"     "gndr8"     "gndr9"    
## [355] "gndr10"    "gndr11"    "gndr12"    "yrbrn"     "agea"      "yrbrn2"   
## [361] "yrbrn3"    "yrbrn4"    "yrbrn5"    "yrbrn6"    "yrbrn7"    "yrbrn8"   
## [367] "yrbrn9"    "yrbrn10"   "yrbrn11"   "yrbrn12"   "rshipa2"   "rshipa3"  
## [373] "rshipa4"   "rshipa5"   "rshipa6"   "rshipa7"   "rshipa8"   "rshipa9"  
## [379] "rshipa10"  "rshipa11"  "rshipa12"  "rshpsts"   "rshpsgb"   "lvgptnea" 
## [385] "dvrcdeva"  "marsts"    "marstgb"   "maritalb"  "chldhhe"   "domicil"  
## [391] "paccmoro"  "paccdwlr"  "pacclift"  "paccnbsh"  "paccocrw"  "paccxhoc" 
## [397] "paccnois"  "paccinro"  "paccnt"    "paccref"   "paccdk"    "paccna"   
## [403] "edulvlb"   "eisced"    "edlveat"   "edlvebe"   "edlvehr"   "edlvgcy"  
## [409] "edlvdfi"   "edlvdfr"   "edudde1"   "educde2"   "edlvegr"   "edlvdahu" 
## [415] "edlvdis"   "edlvdie"   "edlvfit"   "edlvdlt"   "edlvenl"   "edlveno"  
## [421] "edlvipl"   "edlvept"   "edlvdrs"   "edlvdsk"   "edlvesi"   "edlvies"  
## [427] "edlvdse"   "edlvdch"   "educgb1"   "edubgb2"   "edagegb"   "eduyrs"   
## [433] "pdwrk"     "edctn"     "uempla"    "uempli"    "dsbld"     "rtrd"     
## [439] "cmsrv"     "hswrk"     "dngoth"    "dngref"    "dngdk"     "dngna"    
## [445] "mainact"   "mnactic"   "crpdwk"    "pdjobev"   "pdjobyr"   "emplrel"  
## [451] "emplno"    "wrkctra"   "estsz"     "jbspv"     "njbspv"    "wkdcorga" 
## [457] "iorgact"   "wkhct"     "wkhtot"    "nacer2"    "tporgwk"   "isco08"   
## [463] "wrkac6m"   "uemp3m"    "uemp12m"   "uemp5yr"   "mbtru"     "hincsrca" 
## [469] "hinctnta"  "hincfel"   "edulvlpb"  "eiscedp"   "edlvpfat"  "edlvpebe" 
## [475] "edlvpehr"  "edlvpgcy"  "edlvpdfi"  "edlvpdfr"  "edupdde1"  "edupcde2" 
## [481] "edlvpegr"  "edlvpdahu" "edlvpdis"  "edlvpdie"  "edlvpfit"  "edlvpdlt" 
## [487] "edlvpenl"  "edlvpeno"  "edlvphpl"  "edlvpept"  "edlvpdrs"  "edlvpdsk" 
## [493] "edlvpesi"  "edlvphes"  "edlvpdse"  "edlvpdch"  "edupcgb1"  "edupbgb2" 
## [499] "edagepgb"  "pdwrkp"    "edctnp"    "uemplap"   "uemplip"   "dsbldp"   
## [505] "rtrdp"     "cmsrvp"    "hswrkp"    "dngothp"   "dngdkp"    "dngnapp"  
## [511] "dngrefp"   "dngnap"    "mnactp"    "crpdwkp"   "isco08p"   "emprelp"  
## [517] "wkhtotp"   "edulvlfb"  "eiscedf"   "edlvfeat"  "edlvfebe"  "edlvfehr" 
## [523] "edlvfgcy"  "edlvfdfi"  "edlvfdfr"  "edufcde1"  "edufbde2"  "edlvfegr" 
## [529] "edlvfdahu" "edlvfdis"  "edlvfdie"  "edlvffit"  "edlvfdlt"  "edlvfenl" 
## [535] "edlvfeno"  "edlvfgpl"  "edlvfept"  "edlvfdrs"  "edlvfdsk"  "edlvfesi" 
## [541] "edlvfges"  "edlvfdse"  "edlvfdch"  "edufcgb1"  "edufbgb2"  "edagefgb" 
## [547] "emprf14"   "occf14b"   "edulvlmb"  "eiscedm"   "edlvmeat"  "edlvmebe" 
## [553] "edlvmehr"  "edlvmgcy"  "edlvmdfi"  "edlvmdfr"  "edumcde1"  "edumbde2" 
## [559] "edlvmegr"  "edlvmdahu" "edlvmdis"  "edlvmdie"  "edlvmfit"  "edlvmdlt" 
## [565] "edlvmenl"  "edlvmeno"  "edlvmgpl"  "edlvmept"  "edlvmdrs"  "edlvmdsk" 
## [571] "edlvmesi"  "edlvmges"  "edlvmdse"  "edlvmdch"  "edumcgb1"  "edumbgb2" 
## [577] "edagemgb"  "emprm14"   "occm14b"   "atncrse"   "anctrya1"  "anctrya2" 
## [583] "regunit"   "region"    "ipcrtiva"  "impricha"  "ipeqopta"  "ipshabta" 
## [589] "impsafea"  "impdiffa"  "ipfrulea"  "ipudrsta"  "ipmodsta"  "ipgdtima" 
## [595] "impfreea"  "iphlppla"  "ipsucesa"  "ipstrgva"  "ipadvnta"  "ipbhprpa" 
## [601] "iprspota"  "iplylfra"  "impenva"   "imptrada"  "impfuna"   "testji1"  
## [607] "testji2"   "testji3"   "testji4"   "testji5"   "testji6"   "testji7"  
## [613] "testji8"   "testji9"   "respc19a"  "symtc19"   "symtnc19"  "vacc19"   
## [619] "recon"     "inwds"     "ainws"     "ainwe"     "binwe"     "cinwe"    
## [625] "dinwe"     "einwe"     "finwe"     "hinwe"     "iinwe"     "kinwe"    
## [631] "rinwe"     "inwde"     "jinws"     "jinwe"     "inwtm"     "mode"     
## [637] "domain"    "prob"      "stratum"   "psu"       "d20"       "d21"      
## [643] "d22"       "d23"       "d24"       "d25"       "d26"       "d27"      
## [649] "depres"    "alcfreq_n"

names(likert_df)

## [1] "fltdpr"  "flteeff" "slprl"   "wrhpp"   "fltlnl"  "enjlf"   "fltsd"  
## [8] "cldgng"

create basic frequencies

likert(likert_df) # create table directly from long format data

##      Item None or almost none of the time Some of the time Most of the time
## 1  fltdpr                       60.931900         32.97491         4.301075
## 2 flteeff                       60.501193         31.62291         5.131265
## 3   slprl                       37.231504         43.91408        13.723150
## 4   wrhpp                        2.747909         14.81481        49.342891
## 5  fltlnl                       75.478469         20.09569         2.751196
## 6   enjlf                        4.301075         20.07168        46.953405
## 7   fltsd                       59.139785         36.32019         3.106332
## 8  cldgng                       55.794504         34.05018         7.526882
##   All or almost all of the time
## 1                      1.792115
## 2                      2.744630
## 3                      5.131265
## 4                     33.094385
## 5                      1.674641
## 6                     28.673835
## 7                      1.433692
## 8                      2.628435

create basic plot

plot(likert(likert_df)) # create plot directly from long format data

Append mean and counts

#convert to numeric ## short version likert_numeric_df = as.data.frame(lapply((df[,vnames]), as.numeric)) ## long version

likert_numeric_df = df[,vnames]
likert_numeric_df$d20 = as.numeric(likert_numeric_df[,vnames[1]])
likert_numeric_df$d21 = as.numeric(likert_numeric_df[,vnames[2]])
likert_numeric_df$d22 = as.numeric(likert_numeric_df[,vnames[3]])
likert_numeric_df$d23 = as.numeric(likert_numeric_df[,vnames[4]])
likert_numeric_df$d24 = as.numeric(likert_numeric_df[,vnames[5]])
likert_numeric_df$d25 = as.numeric(likert_numeric_df[,vnames[6]])
likert_numeric_df$d26 = as.numeric(likert_numeric_df[,vnames[7]])
likert_numeric_df$d27 = as.numeric(likert_numeric_df[,vnames[8]])

get means

short version

likert_means = lapply((likert_numeric_df[,vnames]), mean, na.rm=T) ## long version

likert_means = c()
likert_means$d20 = mean(likert_numeric_df$d20, na.rm=T)
likert_means$d21 = mean(likert_numeric_df$d21, na.rm=T)
likert_means$d22 = mean(likert_numeric_df$d22, na.rm=T)
likert_means$d23 = mean(likert_numeric_df$d23, na.rm=T)
likert_means$d24 = mean(likert_numeric_df$d24, na.rm=T)
likert_means$d25 = mean(likert_numeric_df$d25, na.rm=T)
likert_means$d26 = mean(likert_numeric_df$d26, na.rm=T)
likert_means$d27 = mean(likert_numeric_df$d27, na.rm=T)
likert_means # print means, outcomment if not needed

## $d20
## [1] 1.469534
## 
## $d21
## [1] 1.501193
## 
## $d22
## [1] 1.867542
## 
## $d23
## [1] 3.127838
## 
## $d24
## [1] 1.30622
## 
## $d25
## [1] 3
## 
## $d26
## [1] 1.468339
## 
## $d27
## [1] 1.569892

tab=likert(likert_df[,vnames])$results
tab$Means=unlist(likert_means)
tab

##      Item None or almost none of the time Some of the time Most of the time
## 1  fltdpr                       60.931900         32.97491         4.301075
## 2 flteeff                       60.501193         31.62291         5.131265
## 3   slprl                       37.231504         43.91408        13.723150
## 4   wrhpp                        2.747909         14.81481        49.342891
## 5  fltlnl                       75.478469         20.09569         2.751196
## 6   enjlf                        4.301075         20.07168        46.953405
## 7   fltsd                       59.139785         36.32019         3.106332
## 8  cldgng                       55.794504         34.05018         7.526882
##   All or almost all of the time    Means
## 1                      1.792115 1.469534
## 2                      2.744630 1.501193
## 3                      5.131265 1.867542
## 4                     33.094385 3.127838
## 5                      1.674641 1.306220
## 6                     28.673835 3.000000
## 7                      1.433692 1.468339
## 8                      2.628435 1.569892

Append counts

get counts

short version

likert_counts = lapply((likert_numeric_df[,vnames]), function (x) sum(!is.na(x)))

long version

likert_counts = c()
likert_counts$d20 = sum(!is.na(likert_numeric_df$d20))
likert_counts$d21 = sum(!is.na(likert_numeric_df$d21))
likert_counts$d22 = sum(!is.na(likert_numeric_df$d22))
likert_counts$d23 = sum(!is.na(likert_numeric_df$d23))
likert_counts$d24 = sum(!is.na(likert_numeric_df$d24))
likert_counts$d25 = sum(!is.na(likert_numeric_df$d25))
likert_counts$d26 = sum(!is.na(likert_numeric_df$d26))
likert_counts$d27 = sum(!is.na(likert_numeric_df$d27))

likert_counts # print counts, outcomment if not needed

## $d20
## [1] 837
## 
## $d21
## [1] 838
## 
## $d22
## [1] 838
## 
## $d23
## [1] 837
## 
## $d24
## [1] 836
## 
## $d25
## [1] 837
## 
## $d26
## [1] 837
## 
## $d27
## [1] 837

# append means and counts to table
likert_table = likert(likert_df)$results # we save the "inner" data frame of the likert structure ... 
likert_table$Mean = unlist(likert_means) # ... and append new columns to the data frame
likert_table$Count = unlist(likert_counts)
likert_table # print extended table, outcomment if not needed

##      Item None or almost none of the time Some of the time Most of the time
## 1  fltdpr                       60.931900         32.97491         4.301075
## 2 flteeff                       60.501193         31.62291         5.131265
## 3   slprl                       37.231504         43.91408        13.723150
## 4   wrhpp                        2.747909         14.81481        49.342891
## 5  fltlnl                       75.478469         20.09569         2.751196
## 6   enjlf                        4.301075         20.07168        46.953405
## 7   fltsd                       59.139785         36.32019         3.106332
## 8  cldgng                       55.794504         34.05018         7.526882
##   All or almost all of the time     Mean Count
## 1                      1.792115 1.469534   837
## 2                      2.744630 1.501193   838
## 3                      5.131265 1.867542   838
## 4                     33.094385 3.127838   837
## 5                      1.674641 1.306220   836
## 6                     28.673835 3.000000   837
## 7                      1.433692 1.468339   837
## 8                      2.628435 1.569892   837

# Set new item labels (make sure order matches your vnames / CES-D8 scale)
likert_table$Item = c(
  d20="Felt depressed", 
  d21="Everything was an effort", 
  d22="Had restless sleep", 
  d23="Felt happy", 
  d24="Felt lonely", 
  d25="Enjoyed life", 
  d26="Felt sad", 
  d27="Had trouble getting going")
  
# Round percentages to 1 decimal
likert_table[,2:6] = round(likert_table[,2:6], 1)

# Round means to 3 decimals
likert_table[,7] = round(likert_table[,7], 3)

# Generate nicely formatted table
kable_styling(kable(likert_table, caption = "Distribution of responses to depression-related items(CES-D8,Iceland sample)"))

Distribution of responses to depression-related items(CES-D8,Iceland sample)
Item	None or almost none of the time	Some of the time	Most of the time	All or almost all of the time	Mean	Count
Felt depressed	60.9	33.0	4.3	1.8	1.5	837
Everything was an effort	60.5	31.6	5.1	2.7	1.5	838
Had restless sleep	37.2	43.9	13.7	5.1	1.9	838
Felt happy	2.7	14.8	49.3	33.1	3.1	837
Felt lonely	75.5	20.1	2.8	1.7	1.3	836
Enjoyed life	4.3	20.1	47.0	28.7	3.0	837
Felt sad	59.1	36.3	3.1	1.4	1.5	837
Had trouble getting going	55.8	34.1	7.5	2.6	1.6	837

# Create Likert plot using only the percentage columns
plot(likert(summary = likert_table[,1:6]))

table(df$cntry)

## 
##            Albania            Austria            Belgium           Bulgaria 
##                  0                  0                  0                  0 
##        Switzerland             Cyprus            Czechia            Germany 
##                  0                  0                  0                  0 
##            Denmark            Estonia              Spain            Finland 
##                  0                  0                  0                  0 
##             France     United Kingdom            Georgia             Greece 
##                  0                  0                  0                  0 
##            Croatia            Hungary            Ireland             Israel 
##                  0                  0                  0                  0 
##            Iceland              Italy          Lithuania         Luxembourg 
##                842                  0                  0                  0 
##             Latvia         Montenegro    North Macedonia        Netherlands 
##                  0                  0                  0                  0 
##             Norway             Poland           Portugal            Romania 
##                  0                  0                  0                  0 
##             Serbia Russian Federation             Sweden           Slovenia 
##                  0                  0                  0                  0 
##           Slovakia             Turkey            Ukraine             Kosovo 
##                  0                  0                  0                  0

unique(df$cntry)

## [1] Iceland
## 40 Levels: Albania Austria Belgium Bulgaria Switzerland Cyprus ... Kosovo

# subset to Iceland
df_Iceland = df[df$cntry == "Iceland", ]
nrow(df_Iceland)

## [1] 842

The Iceland sample consisted of r nrow(df_Iceland) respondents. # Predictors of Clinically Significant Depression ### Creating a Binary Outcome Variable We define clinically significant depression based on a CES-D score cutoff of 16, a widely used threshold in epidemiological studies. Scores of 16 or above are typically indicative of clinically relevant depressive symptoms.

# 1. Create the binary variable for clinical depression
df_Iceland$depres_binary <- ifelse(df_Iceland$depres >= 8, 1, 0)
# Check frequency
table(df_Iceland$dep_binary)

## < table of extent 0 >

prop.table(table(df_Iceland$dep_binary))  # relative frequencies

## numeric(0)

# Ensure categorical variables are factors
df_Iceland$gndr = factor(df_Iceland$gndr, labels = c("Male", "Female"))
df_Iceland$eduyrs = as.numeric(df_Iceland$eduyrs)  # education (numeric)
df_Iceland$health = as.numeric(df_Iceland$health)  # self-rated health

# Logistic regression
df$gndr <- factor(df$gndr, labels = c("Male", "Female"))
df$health <- as.numeric(df$health)

log_model <- glm(depres ~ dosprt + alcfreq + eduyrs + hinctnta, data = df, weights = pspwght)

summary(log_model)

## 
## Call:
## glm(formula = depres ~ dosprt + alcfreq + eduyrs + hinctnta, 
##     data = df, weights = pspwght)
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.0672170  0.1128196  18.323  < 2e-16 ***
## dosprt      -0.0298171  0.0079665  -3.743 0.000196 ***
## alcfreq      0.0176278  0.0109129   1.615 0.106661    
## eduyrs       0.0009354  0.0049878   0.188 0.851286    
## hinctnta    -0.0507387  0.0071362  -7.110  2.7e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.2530937)
## 
##     Null deviance: 209.99  on 757  degrees of freedom
## Residual deviance: 190.58  on 753  degrees of freedom
##   (84 observations deleted due to missingness)
## AIC: 1340.3
## 
## Number of Fisher Scoring iterations: 2

exp(coef(log_model))

## (Intercept)      dosprt     alcfreq      eduyrs    hinctnta 
##    7.902799    0.970623    1.017784    1.000936    0.950527

Interpretation

Frequent physical activity and higher income are significantly associated with lower depression scores, while alcohol use and education level show no significant effects after accounting for survey weights. Compared to unweighted linear regression, the weighted GLM provides more reliable population-level estimates and confirms the robustness of key associations.

Homework 4

2025-06-05

Statistical Analysis of ESS 11 Data

Introduction

Literature

Methods

Depression Scale Calculation and Reliability

Results

Bivariate Associations

Topic: How to report simple frequency distributions for Likert scales?

LIKERT SCALE FREQUENCY TABLES

ESS questions “d20”, “d21”, “d22”, “d23”, “d24”, “d25”, “d26”, “d27”

straighforward approach: create Likert table and Likert plot

create basic frequencies

create basic plot

Append mean and counts

get means

short version

Append counts

get counts

short version

likert_counts = lapply((likert_numeric_df[,vnames]), function (x) sum(!is.na(x)))

long version

Interpretation