library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(arules)

## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## 
## Attaching package: 'arules'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following objects are masked from 'package:base':
## 
##     abbreviate, write

library(arulesViz)

ess <- read_csv("~/Desktop/tasks/ESS11e04_1.csv", show_col_types = FALSE)

## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)

Initial inspection

The European Social Survey Round 11 integrated file is used to ensure the broadest possible geographical coverage. The variable of interest is hincfel which measures how respondents feel about their household income nowadays.

"hincfel" %in% names(ess)

## [1] TRUE

table(ess$hincfel, useNA = "ifany")

## 
##     1     2     3     4     7     8     9 
## 15585 22011  8597  3218   488   198    19

length(unique(ess$cntry))

## [1] 30

Cleaning data

Only valid substantive responses are kept and hincfel is recoded into interpretable categories.

ess_clean <- ess %>%
  filter(hincfel %in% 1:4) %>%
  mutate(
    hincfel = factor(
      hincfel,
      levels = 1:4,
      labels = c("Comfortable", "Coping", "Difficult", "Very_difficult")
    )
  )

table(ess_clean$hincfel)

## 
##    Comfortable         Coping      Difficult Very_difficult 
##          15585          22011           8597           3218

Variable selection

Variables related to subjective well-being, health, education, and labour market status are selected as these are expected to be related to income perception.

vars <- ess_clean %>%
  select(
    hincfel,
    happy,
    stflife,
    health,
    eisced,
    mainact
  )

Discretization

Variables are discretized into high and low categories to make them suitable for association rule mining and to reduce noise from middle values.

vars_disc <- vars %>%
  mutate(
    happy = case_when(
      happy >= 8 ~ "Happy_high",
      happy <= 4 ~ "Happy_low",
      TRUE ~ NA_character_
    ),
    stflife = case_when(
      stflife >= 8 ~ "LifeSat_high",
      stflife <= 4 ~ "LifeSat_low",
      TRUE ~ NA_character_
    ),
    health = case_when(
      health %in% c(1, 2) ~ "Health_good",
      health %in% c(4, 5) ~ "Health_bad",
      TRUE ~ NA_character_
    ),
    eisced = case_when(
      eisced <= 2 ~ "Edu_low",
      eisced >= 5 ~ "Edu_high",
      TRUE ~ NA_character_
    ),
    mainact = case_when(
      mainact == 1 ~ "Employed",
      mainact == 3 ~ "Unemployed",
      TRUE ~ NA_character_
    )
  ) %>%
  drop_na()

dim(vars_disc)

## [1] 947   6

Transactions

Each respondent is treated as one transaction, and their characteristics are treated as items.

trans <- as(vars_disc, "transactions")

## Warning: Column(s) 2, 3, 4, 5, 6 not logical or factor. Applying default
## discretization (see '? discretizeDF').

summary(trans)

## transactions as itemMatrix in sparse format with
##  947 rows (elements/itemsets/transactions) and
##  14 columns (items) and a density of 0.4285714 
## 
## most frequent items:
##     mainact=Employed   health=Health_good     happy=Happy_high 
##                  928                  917                  915 
## stflife=LifeSat_high      eisced=Edu_high              (Other) 
##                  901                  858                 1163 
## 
## element (itemset/transaction) length distribution:
## sizes
##   6 
## 947 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       6       6       6       6       6       6 
## 
## includes extended item information - examples:
##                labels variables      levels
## 1 hincfel=Comfortable   hincfel Comfortable
## 2      hincfel=Coping   hincfel      Coping
## 3   hincfel=Difficult   hincfel   Difficult
## 
## includes extended transaction information - examples:
##   transactionID
## 1             1
## 2             2
## 3             3

Association rule mining

Association rules are generated with hincfel fixed as the consequent in order to identify factors associated with different income perceptions.

rules <- apriori(
  trans,
  parameter = list(supp = 0.02, conf = 0.6),
  appearance = list(rhs = c(
    "hincfel=Comfortable",
    "hincfel=Coping",
    "hincfel=Difficult",
    "hincfel=Very_difficult"
  )),
  control = list(verbose = FALSE)
)

length(rules)

## [1] 32

inspect(sort(rules, by = "lift")[1:10])

##      lhs                        rhs                     support confidence  coverage     lift count
## [1]  {stflife=LifeSat_high,                                                                        
##       health=Health_good,                                                                          
##       eisced=Edu_high,                                                                             
##       mainact=Employed}      => {hincfel=Comfortable} 0.5860612  0.6954887 0.8426610 1.060592   555
## [2]  {happy=Happy_high,                                                                            
##       stflife=LifeSat_high,                                                                        
##       health=Health_good,                                                                          
##       eisced=Edu_high,                                                                             
##       mainact=Employed}      => {hincfel=Comfortable} 0.5850053  0.6951066 0.8416051 1.060010   554
## [3]  {stflife=LifeSat_high,                                                                        
##       eisced=Edu_high,                                                                             
##       mainact=Employed}      => {hincfel=Comfortable} 0.5945090  0.6924969 0.8585005 1.056030   563
## [4]  {happy=Happy_high,                                                                            
##       stflife=LifeSat_high,                                                                        
##       eisced=Edu_high,                                                                             
##       mainact=Employed}      => {hincfel=Comfortable} 0.5934530  0.6921182 0.8574446 1.055452   562
## [5]  {stflife=LifeSat_high,                                                                        
##       health=Health_good,                                                                          
##       eisced=Edu_high}       => {hincfel=Comfortable} 0.5881732  0.6910670 0.8511088 1.053849   557
## [6]  {happy=Happy_high,                                                                            
##       stflife=LifeSat_high,                                                                        
##       health=Health_good,                                                                          
##       eisced=Edu_high}       => {hincfel=Comfortable} 0.5871172  0.6906832 0.8500528 1.053264   556
## [7]  {stflife=LifeSat_high,                                                                        
##       eisced=Edu_high}       => {hincfel=Comfortable} 0.5966209  0.6881851 0.8669483 1.049455   565
## [8]  {happy=Happy_high,                                                                            
##       stflife=LifeSat_high,                                                                        
##       eisced=Edu_high}       => {hincfel=Comfortable} 0.5955649  0.6878049 0.8658923 1.048875   564
## [9]  {happy=Happy_high,                                                                            
##       health=Health_good,                                                                          
##       eisced=Edu_high,                                                                             
##       mainact=Employed}      => {hincfel=Comfortable} 0.5860612  0.6868812 0.8532207 1.047466   555
## [10] {happy=Happy_high,                                                                            
##       eisced=Edu_high,                                                                             
##       mainact=Employed}      => {hincfel=Comfortable} 0.5945090  0.6840826 0.8690602 1.043198   563

Visualasation

plot(rules, measure = c("support", "confidence"), shading = "lift")

plot(sort(rules, by = "lift")[1:10], method = "graph", engine = "htmlwidget")

Interpretation of Results

The extracted association rules show clear patterns between subjective well-being, labor market status, and perceived household income. Rules with high lift indicate that respondents who report low happiness, low life satisfaction, poor health or unemployment are more likely to report that living on their current income is difficult or very difficult. Overall, the results suggest that income perception is strongly linked to both economic position and subjective well-being and these relationships are consistent across a broad set of European countries.

ESS Round 11 Association Rules: hincfel, Task 2

Elif Uzun