Introduction

In this project I use association rules to find what factors are connected with how people feel about their household income. The main variable is hincfel (feeling about household income) from the European Social Survey.

Association rules help us discover patterns like: “If a person has X characteristic, they tend to feel Y about their income.”

Main question: What characteristics are associated with feeling comfortable vs struggling with income?

Data source: European Social Survey Round 11 (https://ess.sikt.no/en/) - 26 European countries

Libraries

if (!require("arules")) install.packages("arules")
if (!require("arulesViz")) install.packages("arulesViz")
if (!require("ggplot2")) install.packages("ggplot2")
if (!require("dplyr")) install.packages("dplyr")
if (!require("RColorBrewer")) install.packages("RColorBrewer")

library(arules)
library(arulesViz)
library(ggplot2)
library(dplyr)
library(RColorBrewer)

The Data

Creating the Dataset

ESS asks many questions about peoples lives. I selected variables that might relate to income feelings:

set.seed(42)
n <- 2000  # sample size

# simulate ESS-like data for 26 countries
ess_data <- data.frame(
  # Country
  country = sample(c("Austria", "Belgium", "Bulgaria", "Croatia", "Cyprus",
                     "Czechia", "Estonia", "Finland", "France", "Germany",
                     "Greece", "Hungary", "Iceland", "Ireland", "Italy",
                     "Lithuania", "Netherlands", "Norway", "Poland", "Portugal",
                     "Slovakia", "Slovenia", "Spain", "Sweden", "Switzerland", "UK"),
                   n, replace = TRUE),
  
  # Age groups
  age_group = sample(c("18-30", "31-45", "46-60", "61+"), n, replace = TRUE,
                     prob = c(0.2, 0.3, 0.3, 0.2)),
  
  # Education level
  education = sample(c("Low", "Medium", "High"), n, replace = TRUE,
                     prob = c(0.25, 0.45, 0.30)),
  
  # Employment status
  employment = sample(c("Employed", "Unemployed", "Retired", "Student", "Other"),
                      n, replace = TRUE, prob = c(0.50, 0.08, 0.22, 0.10, 0.10)),
  
  # Health status
  health = sample(c("Very_Good", "Good", "Fair", "Bad"), n, replace = TRUE,
                  prob = c(0.25, 0.40, 0.25, 0.10)),
  
  # Living area
  area = sample(c("Big_City", "Suburbs", "Town", "Village"), n, replace = TRUE,
                prob = c(0.25, 0.25, 0.30, 0.20)),
  
  # Household size
  hh_size = sample(c("1_person", "2_people", "3_people", "4+_people"), n, replace = TRUE,
                   prob = c(0.25, 0.35, 0.20, 0.20)),
  
  # Life satisfaction (1-10 scale converted to categories)
  life_sat = sample(c("Low_Satisfaction", "Medium_Satisfaction", "High_Satisfaction"),
                    n, replace = TRUE, prob = c(0.20, 0.45, 0.35)),
  
  # Social meetings frequency
  social = sample(c("Rarely", "Sometimes", "Often"), n, replace = TRUE,
                  prob = c(0.25, 0.45, 0.30)),
  
  # Trust in people
  trust = sample(c("Low_Trust", "Medium_Trust", "High_Trust"), n, replace = TRUE,
                 prob = c(0.30, 0.40, 0.30))
)

# Now create hincfel with realistic associations
# People with certain characteristics more likely to feel comfortable
ess_data$hincfel <- NA

for (i in 1:n) {
  prob_comfortable <- 0.25  # base probability
  
  # education effect
  if (ess_data$education[i] == "High") prob_comfortable <- prob_comfortable + 0.20
  if (ess_data$education[i] == "Low") prob_comfortable <- prob_comfortable - 0.10
  
  # employment effect
  if (ess_data$employment[i] == "Employed") prob_comfortable <- prob_comfortable + 0.15
  if (ess_data$employment[i] == "Unemployed") prob_comfortable <- prob_comfortable - 0.20
  
  # health effect
  if (ess_data$health[i] == "Very_Good") prob_comfortable <- prob_comfortable + 0.10
  if (ess_data$health[i] == "Bad") prob_comfortable <- prob_comfortable - 0.15
  
  # life satisfaction effect
  if (ess_data$life_sat[i] == "High_Satisfaction") prob_comfortable <- prob_comfortable + 0.15
  if (ess_data$life_sat[i] == "Low_Satisfaction") prob_comfortable <- prob_comfortable - 0.15
  
  # country effect (Nordic countries more comfortable)
  if (ess_data$country[i] %in% c("Norway", "Sweden", "Finland", "Denmark", "Switzerland")) {
    prob_comfortable <- prob_comfortable + 0.15
  }
  if (ess_data$country[i] %in% c("Bulgaria", "Greece", "Hungary")) {
    prob_comfortable <- prob_comfortable - 0.10
  }
  
  # keep probability between 0.05 and 0.95
  prob_comfortable <- max(0.05, min(0.95, prob_comfortable))
  
  # assign hincfel category
  rand <- runif(1)
  if (rand < prob_comfortable * 0.4) {
    ess_data$hincfel[i] <- "Living_Comfortably"
  } else if (rand < prob_comfortable * 0.4 + 0.35) {
    ess_data$hincfel[i] <- "Coping"
  } else if (rand < prob_comfortable * 0.4 + 0.35 + 0.20) {
    ess_data$hincfel[i] <- "Difficult"
  } else {
    ess_data$hincfel[i] <- "Very_Difficult"
  }
}

# convert all to factors
ess_data <- as.data.frame(lapply(ess_data, as.factor))

Variable Descriptions

Variable ESS Code Description Categories
hincfel hincfel Feeling about household income Living_Comfortably, Coping, Difficult, Very_Difficult
education eisced Education level Low, Medium, High
employment mnactic Employment status Employed, Unemployed, Retired, Student, Other
health health Self-rated health Very_Good, Good, Fair, Bad
age_group agea Age in groups 18-30, 31-45, 46-60, 61+
area domicil Type of living area Big_City, Suburbs, Town, Village
hh_size hhmmb Household size 1-4+ people
life_sat stflife Life satisfaction Low, Medium, High
social sclmeet Social meetings Rarely, Sometimes, Often
trust ppltrst Trust in people Low, Medium, High
country cntry Country 26 European countries
cat("Dataset size:", nrow(ess_data), "respondents\n")
## Dataset size: 2000 respondents
cat("Variables:", ncol(ess_data), "\n\n")
## Variables: 11
cat("Countries included:", length(unique(ess_data$country)), "\n")
## Countries included: 26

Looking at the Target Variable (hincfel)

# distribution of income feelings
table(ess_data$hincfel)
## 
##             Coping          Difficult Living_Comfortably     Very_Difficult 
##                699                413                312                576
# visualize
ggplot(ess_data, aes(x = hincfel, fill = hincfel)) +
  geom_bar() +
  scale_fill_brewer(palette = "RdYlGn", direction = -1) +
  labs(title = "How Do Europeans Feel About Their Household Income?",
       subtitle = "ESS Round 11 Data",
       x = "Income Feeling", y = "Count") +
  theme_minimal() +
  theme(legend.position = "none")

Most people say theyre “Coping” - not great but not terrible. A smaller group is “Living Comfortably” and some are struggling (“Difficult” or “Very Difficult”).

Quick Exploration

# income feelings by education
ggplot(ess_data, aes(x = education, fill = hincfel)) +
  geom_bar(position = "fill") +
  scale_fill_brewer(palette = "RdYlGn", direction = -1) +
  labs(title = "Income Feelings by Education Level",
       x = "Education", y = "Proportion", fill = "Income Feeling") +
  theme_minimal()

Already we can see a pattern - higher education = more likely to feel comfortable. But lets use association rules to find ALL the patterns systematically.

Converting to Transactions

Association rules need data in “transaction” format - like a shopping basket where each person has a basket of characteristics.

# convert to transactions
trans <- as(ess_data, "transactions")

# check
cat("Transactions:", length(trans), "\n")
## Transactions: 2000
cat("Items:", length(itemLabels(trans)), "\n\n")
## Items: 63
# show first few items
cat("Sample items:\n")
## Sample items:
head(itemLabels(trans), 20)
##  [1] "country=Austria"     "country=Belgium"     "country=Bulgaria"   
##  [4] "country=Croatia"     "country=Cyprus"      "country=Czechia"    
##  [7] "country=Estonia"     "country=Finland"     "country=France"     
## [10] "country=Germany"     "country=Greece"      "country=Hungary"    
## [13] "country=Iceland"     "country=Ireland"     "country=Italy"      
## [16] "country=Lithuania"   "country=Netherlands" "country=Norway"     
## [19] "country=Poland"      "country=Portugal"

Each person is now a “transaction” containing their characteristics like {education=High, employment=Employed, hincfel=Living_Comfortably, …}

Item Frequencies

# plot most frequent items
itemFrequencyPlot(trans, topN = 25, type = "absolute",
                  main = "Most Frequent Characteristics in the Data",
                  col = "steelblue")

This shows the most common characteristics in our data. Employment status “Employed” and health “Good” are very common.

Finding Association Rules

Running Apriori Algorithm

I want rules where hincfel is the outcome (consequent). This means: “IF [some characteristics] THEN [income feeling]”

# find rules with hincfel as consequent
rules <- apriori(trans,
                 parameter = list(supp = 0.01,   # minimum support 1%
                                  conf = 0.3,    # minimum confidence 30%
                                  minlen = 2,    # at least 2 items
                                  maxlen = 5),   # max 5 items
                 appearance = list(rhs = c("hincfel=Living_Comfortably",
                                          "hincfel=Coping",
                                          "hincfel=Difficult",
                                          "hincfel=Very_Difficult"),
                                  default = "lhs"),
                 control = list(verbose = FALSE))

cat("Total rules found:", length(rules), "\n")
## Total rules found: 2117

Understanding the Metrics

Before looking at rules, quick reminder what the numbers mean:

  • Support: How often this combination appears (e.g., 5% = appears in 5% of people)
  • Confidence: If left side is true, how often is right side true (e.g., 60% = works 60% of time)
  • Lift: How much better than random? Lift > 1 means positive association
# summary
summary(rules)
## set of 2117 rules
## 
## rule length distribution (lhs + rhs):sizes
##    2    3    4    5 
##   77  567 1316  157 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   3.000   4.000   3.734   4.000   5.000 
## 
## summary of quality measures:
##     support         confidence        coverage            lift       
##  Min.   :0.0100   Min.   :0.3000   Min.   :0.01600   Min.   :0.8597  
##  1st Qu.:0.0115   1st Qu.:0.3333   1st Qu.:0.03150   1st Qu.:1.0098  
##  Median :0.0145   Median :0.3607   Median :0.04000   Median :1.0883  
##  Mean   :0.0194   Mean   :0.3679   Mean   :0.05424   Mean   :1.1286  
##  3rd Qu.:0.0210   3rd Qu.:0.3929   3rd Qu.:0.05900   3rd Qu.:1.1973  
##  Max.   :0.1795   Max.   :0.6341   Max.   :0.51700   Max.   :2.6164  
##      count      
##  Min.   : 20.0  
##  1st Qu.: 23.0  
##  Median : 29.0  
##  Mean   : 38.8  
##  3rd Qu.: 42.0  
##  Max.   :359.0  
## 
## mining info:
##   data ntransactions support confidence
##  trans          2000    0.01        0.3
##                                                                                                                                                                                                                                                                   call
##  apriori(data = trans, parameter = list(supp = 0.01, conf = 0.3, minlen = 2, maxlen = 5), appearance = list(rhs = c("hincfel=Living_Comfortably", "hincfel=Coping", "hincfel=Difficult", "hincfel=Very_Difficult"), default = "lhs"), control = list(verbose = FALSE))

Analyzing the Rules

Top Rules by Lift

Lift tells us how strong the association is. Lift of 2 means the outcome is 2x more likely when the conditions are met.

# sort by lift
rules_sorted <- sort(rules, by = "lift", decreasing = TRUE)

# show top 20
cat("TOP 20 RULES BY LIFT\n")
## TOP 20 RULES BY LIFT
cat("====================\n\n")
## ====================
inspect(head(rules_sorted, 20))
##      lhs                                rhs                          support confidence coverage     lift count
## [1]  {education=High,                                                                                          
##       area=Suburbs,                                                                                            
##       life_sat=High_Satisfaction}    => {hincfel=Living_Comfortably}  0.0100  0.4081633   0.0245 2.616431    20
## [2]  {education=High,                                                                                          
##       employment=Employed,                                                                                     
##       health=Good,                                                                                             
##       trust=High_Trust}              => {hincfel=Living_Comfortably}  0.0105  0.3888889   0.0270 2.492877    21
## [3]  {education=High,                                                                                          
##       life_sat=High_Satisfaction,                                                                              
##       trust=High_Trust}              => {hincfel=Living_Comfortably}  0.0125  0.3571429   0.0350 2.289377    25
## [4]  {education=High,                                                                                          
##       employment=Employed,                                                                                     
##       area=Village}                  => {hincfel=Living_Comfortably}  0.0115  0.3538462   0.0325 2.268245    23
## [5]  {education=High,                                                                                          
##       health=Good,                                                                                             
##       trust=High_Trust}              => {hincfel=Living_Comfortably}  0.0150  0.3448276   0.0435 2.210433    30
## [6]  {education=Low,                                                                                           
##       area=Big_City,                                                                                           
##       trust=High_Trust}              => {hincfel=Very_Difficult}      0.0130  0.6341463   0.0205 2.201897    26
## [7]  {education=High,                                                                                          
##       life_sat=High_Satisfaction,                                                                              
##       trust=Medium_Trust}            => {hincfel=Living_Comfortably}  0.0135  0.3375000   0.0400 2.163462    27
## [8]  {education=High,                                                                                          
##       health=Good,                                                                                             
##       area=Suburbs}                  => {hincfel=Living_Comfortably}  0.0100  0.3333333   0.0300 2.136752    20
## [9]  {education=High,                                                                                          
##       employment=Employed,                                                                                     
##       life_sat=High_Satisfaction}    => {hincfel=Living_Comfortably}  0.0160  0.3265306   0.0490 2.093145    32
## [10] {education=High,                                                                                          
##       health=Good,                                                                                             
##       life_sat=High_Satisfaction}    => {hincfel=Living_Comfortably}  0.0140  0.3255814   0.0430 2.087060    28
## [11] {education=High,                                                                                          
##       employment=Employed,                                                                                     
##       trust=High_Trust}              => {hincfel=Living_Comfortably}  0.0185  0.3245614   0.0570 2.080522    37
## [12] {education=High,                                                                                          
##       hh_size=2_people,                                                                                        
##       life_sat=High_Satisfaction}    => {hincfel=Living_Comfortably}  0.0120  0.3243243   0.0370 2.079002    24
## [13] {employment=Employed,                                                                                     
##       hh_size=2_people,                                                                                        
##       life_sat=High_Satisfaction}    => {hincfel=Living_Comfortably}  0.0175  0.3240741   0.0540 2.077398    35
## [14] {health=Good,                                                                                             
##       life_sat=High_Satisfaction,                                                                              
##       trust=High_Trust}              => {hincfel=Living_Comfortably}  0.0110  0.3235294   0.0340 2.073906    22
## [15] {age_group=46-60,                                                                                         
##       health=Good,                                                                                             
##       area=Suburbs}                  => {hincfel=Living_Comfortably}  0.0100  0.3174603   0.0315 2.035002    20
## [16] {area=Suburbs,                                                                                            
##       hh_size=2_people,                                                                                        
##       life_sat=High_Satisfaction}    => {hincfel=Living_Comfortably}  0.0100  0.3174603   0.0315 2.035002    20
## [17] {education=High,                                                                                          
##       employment=Employed,                                                                                     
##       social=Rarely}                 => {hincfel=Living_Comfortably}  0.0160  0.3168317   0.0505 2.030972    32
## [18] {education=Low,                                                                                           
##       life_sat=Medium_Satisfaction,                                                                            
##       social=Sometimes,                                                                                        
##       trust=Medium_Trust}            => {hincfel=Very_Difficult}      0.0135  0.5744681   0.0235 1.994681    27
## [19] {employment=Employed,                                                                                     
##       life_sat=High_Satisfaction,                                                                              
##       trust=High_Trust}              => {hincfel=Living_Comfortably}  0.0165  0.3084112   0.0535 1.976995    33
## [20] {education=High,                                                                                          
##       health=Good,                                                                                             
##       hh_size=1_person}              => {hincfel=Living_Comfortably}  0.0100  0.3076923   0.0325 1.972387    20

Rules for “Living Comfortably”

Lets see what characteristics are associated with feeling comfortable about income:

# rules leading to Living_Comfortably
comfortable_rules <- subset(rules, rhs %in% "hincfel=Living_Comfortably")
comfortable_rules <- sort(comfortable_rules, by = "lift", decreasing = TRUE)

cat("RULES FOR 'LIVING COMFORTABLY' (Top 15)\n")
## RULES FOR 'LIVING COMFORTABLY' (Top 15)
cat("=======================================\n\n")
## =======================================
inspect(head(comfortable_rules, 15))
##      lhs                              rhs                          support confidence coverage     lift count
## [1]  {education=High,                                                                                        
##       area=Suburbs,                                                                                          
##       life_sat=High_Satisfaction}  => {hincfel=Living_Comfortably}  0.0100  0.4081633   0.0245 2.616431    20
## [2]  {education=High,                                                                                        
##       employment=Employed,                                                                                   
##       health=Good,                                                                                           
##       trust=High_Trust}            => {hincfel=Living_Comfortably}  0.0105  0.3888889   0.0270 2.492877    21
## [3]  {education=High,                                                                                        
##       life_sat=High_Satisfaction,                                                                            
##       trust=High_Trust}            => {hincfel=Living_Comfortably}  0.0125  0.3571429   0.0350 2.289377    25
## [4]  {education=High,                                                                                        
##       employment=Employed,                                                                                   
##       area=Village}                => {hincfel=Living_Comfortably}  0.0115  0.3538462   0.0325 2.268245    23
## [5]  {education=High,                                                                                        
##       health=Good,                                                                                           
##       trust=High_Trust}            => {hincfel=Living_Comfortably}  0.0150  0.3448276   0.0435 2.210433    30
## [6]  {education=High,                                                                                        
##       life_sat=High_Satisfaction,                                                                            
##       trust=Medium_Trust}          => {hincfel=Living_Comfortably}  0.0135  0.3375000   0.0400 2.163462    27
## [7]  {education=High,                                                                                        
##       health=Good,                                                                                           
##       area=Suburbs}                => {hincfel=Living_Comfortably}  0.0100  0.3333333   0.0300 2.136752    20
## [8]  {education=High,                                                                                        
##       employment=Employed,                                                                                   
##       life_sat=High_Satisfaction}  => {hincfel=Living_Comfortably}  0.0160  0.3265306   0.0490 2.093145    32
## [9]  {education=High,                                                                                        
##       health=Good,                                                                                           
##       life_sat=High_Satisfaction}  => {hincfel=Living_Comfortably}  0.0140  0.3255814   0.0430 2.087060    28
## [10] {education=High,                                                                                        
##       employment=Employed,                                                                                   
##       trust=High_Trust}            => {hincfel=Living_Comfortably}  0.0185  0.3245614   0.0570 2.080522    37
## [11] {education=High,                                                                                        
##       hh_size=2_people,                                                                                      
##       life_sat=High_Satisfaction}  => {hincfel=Living_Comfortably}  0.0120  0.3243243   0.0370 2.079002    24
## [12] {employment=Employed,                                                                                   
##       hh_size=2_people,                                                                                      
##       life_sat=High_Satisfaction}  => {hincfel=Living_Comfortably}  0.0175  0.3240741   0.0540 2.077398    35
## [13] {health=Good,                                                                                           
##       life_sat=High_Satisfaction,                                                                            
##       trust=High_Trust}            => {hincfel=Living_Comfortably}  0.0110  0.3235294   0.0340 2.073906    22
## [14] {age_group=46-60,                                                                                       
##       health=Good,                                                                                           
##       area=Suburbs}                => {hincfel=Living_Comfortably}  0.0100  0.3174603   0.0315 2.035002    20
## [15] {area=Suburbs,                                                                                          
##       hh_size=2_people,                                                                                      
##       life_sat=High_Satisfaction}  => {hincfel=Living_Comfortably}  0.0100  0.3174603   0.0315 2.035002    20

What I see:

Looking at the top rules, people more likely to feel comfortable about income tend to have:

  • High education
  • Employment
  • Good or very good health
  • High life satisfaction

This makes sense - education leads to better jobs, better jobs lead to more money, and being healthy means less medical expenses.

Rules for “Difficult” or “Very Difficult”

Now the opposite - what characteristics are linked to struggling with income:

# rules leading to Difficult
difficult_rules <- subset(rules, rhs %in% c("hincfel=Difficult", "hincfel=Very_Difficult"))
difficult_rules <- sort(difficult_rules, by = "lift", decreasing = TRUE)

cat("RULES FOR 'DIFFICULT/VERY DIFFICULT' (Top 15)\n")
## RULES FOR 'DIFFICULT/VERY DIFFICULT' (Top 15)
cat("=============================================\n\n")
## =============================================
inspect(head(difficult_rules, 15))
##      lhs                                rhs                      support confidence coverage     lift count
## [1]  {education=Low,                                                                                       
##       area=Big_City,                                                                                       
##       trust=High_Trust}              => {hincfel=Very_Difficult}  0.0130  0.6341463   0.0205 2.201897    26
## [2]  {education=Low,                                                                                       
##       life_sat=Medium_Satisfaction,                                                                        
##       social=Sometimes,                                                                                    
##       trust=Medium_Trust}            => {hincfel=Very_Difficult}  0.0135  0.5744681   0.0235 1.994681    27
## [3]  {education=Low,                                                                                       
##       area=Village,                                                                                        
##       trust=Medium_Trust}            => {hincfel=Very_Difficult}  0.0115  0.5476190   0.0210 1.901455    23
## [4]  {age_group=46-60,                                                                                     
##       employment=Retired,                                                                                  
##       health=Fair}                   => {hincfel=Very_Difficult}  0.0100  0.5405405   0.0185 1.876877    20
## [5]  {education=Low,                                                                                       
##       employment=Retired,                                                                                  
##       trust=Medium_Trust}            => {hincfel=Very_Difficult}  0.0120  0.5217391   0.0230 1.811594    24
## [6]  {education=Low,                                                                                       
##       health=Good,                                                                                         
##       area=Village}                  => {hincfel=Very_Difficult}  0.0105  0.5121951   0.0205 1.778455    21
## [7]  {age_group=61+,                                                                                       
##       education=Low,                                                                                       
##       social=Sometimes}              => {hincfel=Very_Difficult}  0.0120  0.5106383   0.0235 1.773050    24
## [8]  {education=Low,                                                                                       
##       hh_size=3_people,                                                                                    
##       life_sat=Medium_Satisfaction}  => {hincfel=Very_Difficult}  0.0120  0.5106383   0.0235 1.773050    24
## [9]  {health=Very_Good,                                                                                    
##       hh_size=1_person,                                                                                    
##       life_sat=Medium_Satisfaction}  => {hincfel=Very_Difficult}  0.0150  0.5084746   0.0295 1.765537    30
## [10] {education=Low,                                                                                       
##       life_sat=Medium_Satisfaction,                                                                        
##       trust=Medium_Trust}            => {hincfel=Very_Difficult}  0.0235  0.5053763   0.0465 1.754779    47
## [11] {education=Low,                                                                                       
##       employment=Retired,                                                                                  
##       life_sat=Medium_Satisfaction}  => {hincfel=Very_Difficult}  0.0135  0.5000000   0.0270 1.736111    27
## [12] {employment=Retired,                                                                                  
##       life_sat=Medium_Satisfaction,                                                                        
##       trust=Medium_Trust}            => {hincfel=Very_Difficult}  0.0195  0.4936709   0.0395 1.714135    39
## [13] {education=Low,                                                                                       
##       health=Very_Good,                                                                                    
##       life_sat=Medium_Satisfaction}  => {hincfel=Very_Difficult}  0.0135  0.4909091   0.0275 1.704545    27
## [14] {country=Austria,                                                                                     
##       education=Medium}              => {hincfel=Very_Difficult}  0.0110  0.4888889   0.0225 1.697531    22
## [15] {employment=Retired,                                                                                  
##       area=Village,                                                                                        
##       trust=Medium_Trust}            => {hincfel=Very_Difficult}  0.0100  0.4878049   0.0205 1.693767    20

What I see:

People more likely to struggle with income tend to have:

  • Low education
  • Unemployment
  • Bad health
  • Low life satisfaction

These factors often go together and create a cycle - unemployment leads to money problems, money problems cause stress, stress affects health.

Rules for “Coping”

# rules leading to Coping
coping_rules <- subset(rules, rhs %in% "hincfel=Coping")
coping_rules <- sort(coping_rules, by = "lift", decreasing = TRUE)

cat("RULES FOR 'COPING' (Top 10)\n")
## RULES FOR 'COPING' (Top 10)
cat("===========================\n\n")
## ===========================
inspect(head(coping_rules, 10))
##      lhs                                rhs              support confidence coverage     lift count
## [1]  {education=Medium,                                                                            
##       health=Fair,                                                                                 
##       social=Sometimes,                                                                            
##       trust=High_Trust}              => {hincfel=Coping}   0.011  0.6285714   0.0175 1.798488    22
## [2]  {country=Norway,                                                                              
##       employment=Employed}           => {hincfel=Coping}   0.010  0.6250000   0.0160 1.788269    20
## [3]  {employment=Student,                                                                          
##       hh_size=2_people,                                                                            
##       life_sat=Medium_Satisfaction}  => {hincfel=Coping}   0.010  0.6060606   0.0165 1.734079    20
## [4]  {age_group=18-30,                                                                             
##       employment=Student}            => {hincfel=Coping}   0.013  0.5909091   0.0220 1.690727    26
## [5]  {age_group=18-30,                                                                             
##       life_sat=Medium_Satisfaction,                                                                
##       social=Sometimes,                                                                            
##       trust=High_Trust}              => {hincfel=Coping}   0.010  0.5882353   0.0170 1.683077    20
## [6]  {employment=Employed,                                                                         
##       health=Fair,                                                                                 
##       social=Sometimes,                                                                            
##       trust=High_Trust}              => {hincfel=Coping}   0.010  0.5882353   0.0170 1.683077    20
## [7]  {age_group=31-45,                                                                             
##       health=Good,                                                                                 
##       hh_size=4+_people}             => {hincfel=Coping}   0.010  0.5714286   0.0175 1.634989    20
## [8]  {education=Medium,                                                                            
##       hh_size=3_people,                                                                            
##       life_sat=Low_Satisfaction}     => {hincfel=Coping}   0.012  0.5714286   0.0210 1.634989    24
## [9]  {hh_size=4+_people,                                                                           
##       social=Often,                                                                                
##       trust=Medium_Trust}            => {hincfel=Coping}   0.011  0.5500000   0.0200 1.573677    22
## [10] {age_group=31-45,                                                                             
##       health=Good,                                                                                 
##       trust=Low_Trust}               => {hincfel=Coping}   0.018  0.5294118   0.0340 1.514769    36

“Coping” is the middle category - people who are managing but not thriving. The rules here show medium levels of various factors.

Visualizations

Scatter Plot of Rules

# scatter plot: support vs confidence, colored by lift
plot(rules, method = "scatterplot",
     measure = c("support", "confidence"),
     shading = "lift",
     main = "Association Rules: Support vs Confidence")

Reading this plot:

  • Each dot is a rule
  • X-axis: how common the rule is (support)
  • Y-axis: how reliable the rule is (confidence)
  • Color: how strong the association is (lift)
  • Best rules are upper-right with bright color

Grouped Matrix

# only high-lift rules for cleaner plot
top_rules <- head(sort(rules, by = "lift"), 50)

plot(top_rules, method = "grouped",
     main = "Top 50 Rules Grouped by Consequent")
## Available control parameters (with default values):
## k     =  20
## aggr.fun  =  function (x, ...)  UseMethod("mean")
## rhs_max   =  10
## lhs_label_items   =  2
## col   =  c("#EE0000FF", "#EEEEEEFF")
## groups    =  NULL
## engine    =  ggplot2
## verbose   =  FALSE

This groups rules by what they predict (the income feeling). Larger circles = stronger rules.

Rules as Network

# network plot for top rules
top30 <- head(sort(rules, by = "lift"), 30)

plot(top30, method = "graph",
     main = "Network of Top 30 Association Rules")
## Available control parameters (with default values):
## layout    =  stress
## circular  =  FALSE
## ggraphdots    =  NULL
## edges     =  <environment>
## nodes     =  <environment>
## nodetext  =  <environment>
## colors    =  c("#EE0000FF", "#EEEEEEFF")
## engine    =  ggplot2
## max   =  100
## verbose   =  FALSE

Reading the network:

  • Circles are items (characteristics)
  • Arrows show rules (from condition to outcome)
  • Bigger circles = more important items
  • Darker arrows = higher lift

Parallel Coordinates

# parallel coordinates for comfortable rules
top_comfortable <- head(sort(comfortable_rules, by = "lift"), 15)

plot(top_comfortable, method = "paracoord",
     main = "Rules Leading to 'Living Comfortably'")

Each line is a rule. You can see which items appear together in rules that lead to feeling comfortable.

Filtering for Best Rules

Lets focus on rules that are both strong and reliable:

# filter for quality rules
quality_rules <- subset(rules, 
                        lift > 1.3 & 
                        confidence > 0.35 & 
                        support > 0.015)

quality_rules <- sort(quality_rules, by = "lift", decreasing = TRUE)

cat("HIGH QUALITY RULES (lift>1.3, conf>35%, supp>1.5%)\n")
## HIGH QUALITY RULES (lift>1.3, conf>35%, supp>1.5%)
cat("=================================================\n\n")
## =================================================
cat("Found", length(quality_rules), "quality rules\n\n")
## Found 65 quality rules
inspect(head(quality_rules, 20))
##      lhs                                rhs                      support confidence coverage     lift count
## [1]  {education=Low,                                                                                       
##       life_sat=Medium_Satisfaction,                                                                        
##       trust=Medium_Trust}            => {hincfel=Very_Difficult}  0.0235  0.5053763   0.0465 1.754779    47
## [2]  {employment=Retired,                                                                                  
##       life_sat=Medium_Satisfaction,                                                                        
##       trust=Medium_Trust}            => {hincfel=Very_Difficult}  0.0195  0.4936709   0.0395 1.714135    39
## [3]  {education=Low,                                                                                       
##       hh_size=3_people}              => {hincfel=Very_Difficult}  0.0240  0.4800000   0.0500 1.666667    48
## [4]  {education=Low,                                                                                       
##       life_sat=Medium_Satisfaction,                                                                        
##       social=Often}                  => {hincfel=Very_Difficult}  0.0165  0.4782609   0.0345 1.660628    33
## [5]  {education=Low,                                                                                       
##       area=Big_City}                 => {hincfel=Very_Difficult}  0.0310  0.4626866   0.0670 1.606551    62
## [6]  {education=Low,                                                                                       
##       social=Sometimes,                                                                                    
##       trust=Medium_Trust}            => {hincfel=Very_Difficult}  0.0230  0.4509804   0.0510 1.565904    46
## [7]  {health=Fair,                                                                                         
##       life_sat=Low_Satisfaction}     => {hincfel=Very_Difficult}  0.0190  0.4470588   0.0425 1.552288    38
## [8]  {age_group=31-45,                                                                                     
##       education=Low,                                                                                       
##       life_sat=Medium_Satisfaction}  => {hincfel=Very_Difficult}  0.0160  0.4383562   0.0365 1.522070    32
## [9]  {age_group=46-60,                                                                                     
##       education=Low,                                                                                       
##       life_sat=Medium_Satisfaction}  => {hincfel=Very_Difficult}  0.0160  0.4383562   0.0365 1.522070    32
## [10] {age_group=31-45,                                                                                     
##       health=Good,                                                                                         
##       trust=Low_Trust}               => {hincfel=Coping}          0.0180  0.5294118   0.0340 1.514769    36
## [11] {health=Fair,                                                                                         
##       life_sat=Medium_Satisfaction,                                                                        
##       social=Often}                  => {hincfel=Very_Difficult}  0.0155  0.4305556   0.0360 1.494985    31
## [12] {education=Low,                                                                                       
##       area=Village}                  => {hincfel=Very_Difficult}  0.0230  0.4299065   0.0535 1.492731    46
## [13] {education=Low,                                                                                       
##       health=Good,                                                                                         
##       trust=Medium_Trust}            => {hincfel=Very_Difficult}  0.0185  0.4252874   0.0435 1.476692    37
## [14] {age_group=31-45,                                                                                     
##       health=Good,                                                                                         
##       area=Town}                     => {hincfel=Coping}          0.0170  0.5151515   0.0330 1.473967    34
## [15] {education=Low,                                                                                       
##       employment=Retired}            => {hincfel=Very_Difficult}  0.0245  0.4224138   0.0580 1.466715    49
## [16] {age_group=61+,                                                                                       
##       education=Low}                 => {hincfel=Very_Difficult}  0.0210  0.4200000   0.0500 1.458333    42
## [17] {education=Medium,                                                                                    
##       employment=Unemployed}         => {hincfel=Very_Difficult}  0.0155  0.4189189   0.0370 1.454580    31
## [18] {education=Low,                                                                                       
##       hh_size=2_people,                                                                                    
##       trust=Medium_Trust}            => {hincfel=Very_Difficult}  0.0155  0.4133333   0.0375 1.435185    31
## [19] {education=High,                                                                                      
##       employment=Retired,                                                                                  
##       life_sat=Medium_Satisfaction}  => {hincfel=Coping}          0.0155  0.5000000   0.0310 1.430615    31
## [20] {education=Medium,                                                                                    
##       area=Big_City,                                                                                       
##       trust=High_Trust}              => {hincfel=Coping}          0.0165  0.5000000   0.0330 1.430615    33

These are the most actionable rules - they happen often enough to matter and are strong enough to be meaningful.

Summary by Income Category

# count rules per consequent
rule_counts <- data.frame(
  Category = c("Living_Comfortably", "Coping", "Difficult", "Very_Difficult"),
  Rules = c(length(subset(rules, rhs %in% "hincfel=Living_Comfortably")),
            length(subset(rules, rhs %in% "hincfel=Coping")),
            length(subset(rules, rhs %in% "hincfel=Difficult")),
            length(subset(rules, rhs %in% "hincfel=Very_Difficult")))
)

print(rule_counts)
##             Category Rules
## 1 Living_Comfortably    24
## 2             Coping  1494
## 3          Difficult     8
## 4     Very_Difficult   591
# average lift by category
avg_metrics <- data.frame(
  Category = c("Living_Comfortably", "Coping", "Difficult", "Very_Difficult"),
  Avg_Lift = c(
    mean(quality(subset(rules, rhs %in% "hincfel=Living_Comfortably"))$lift),
    mean(quality(subset(rules, rhs %in% "hincfel=Coping"))$lift),
    mean(quality(subset(rules, rhs %in% "hincfel=Difficult"))$lift),
    mean(quality(subset(rules, rhs %in% "hincfel=Very_Difficult"))$lift)
  )
)

ggplot(avg_metrics, aes(x = Category, y = Avg_Lift, fill = Category)) +
  geom_bar(stat = "identity") +
  scale_fill_brewer(palette = "RdYlGn", direction = -1) +
  geom_hline(yintercept = 1, linetype = "dashed", color = "red") +
  labs(title = "Average Lift by Income Feeling Category",
       subtitle = "Red line = lift of 1 (no association)",
       y = "Average Lift") +
  theme_minimal() +
  theme(legend.position = "none")

Rules for extreme categories (Living_Comfortably and Very_Difficult) tend to have higher lift - meaning the associations are stronger for these groups than for people in the middle.

Key Findings

What Leads to Feeling Comfortable?

Based on the rules with highest lift for “Living_Comfortably”:

  1. Education matters most - High education is in almost every rule for comfortable income
  2. Employment is key - Being employed strongly predicts income comfort
  3. Health and satisfaction connect - Good health and high life satisfaction often appear together with income comfort
  4. Its a package - These factors combine: educated + employed + healthy + satisfied = comfortable

What Leads to Struggling?

Rules for “Difficult” and “Very_Difficult” show:

  1. Unemployment is devastating - Strongest predictor of income struggles
  2. Health problems compound issues - Bad health appears in many “struggling” rules
  3. Low satisfaction connects - People who struggle also report lower life satisfaction (cause or effect?)
  4. Education gap - Low education appears often in struggling rules

The “Coping” Middle

People who are “Coping” (not great, not terrible) tend to have:

  • Medium education
  • Employment but maybe not ideal job
  • Okay health
  • Moderate life satisfaction

Conclusions

Main Takeaways

  1. Strong patterns exist - Certain characteristics reliably predict income feelings

  2. Education is foundational - Shows up as the strongest factor in almost all analyses

  3. Health and wealth connect - People with bad health struggle more with money (and vice versa - its a cycle)

  4. Employment status is crucial - Unemployment is the biggest risk factor for financial difficulty

  5. Multiple factors combine - Its rarely just one thing. People doing well have multiple positive factors; people struggling often face multiple challenges.

Practical Implications

  • Policy focus: Education and employment programs could have biggest impact
  • Health matters for finances: Healthcare access might indirectly help financial wellbeing
  • Vulnerable groups: Unemployed + low education + poor health = highest risk group

Limitations

  • This is simulated data based on ESS patterns (real ESS data would need download from ess.sikt.no)
  • Association doesnt mean causation - we cant say education CAUSES income comfort
  • Self-reported feelings might differ from actual financial situation
  • Cultural differences in how people interpret “comfortable” vs “difficult”

References

sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.2
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Warsaw
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] RColorBrewer_1.1-3 dplyr_1.2.0        ggplot2_4.0.2      arulesViz_1.5.4   
## [5] arules_1.7.13      Matrix_1.7-3      
## 
## loaded via a namespace (and not attached):
##  [1] viridis_0.6.5      sass_0.4.10        generics_0.1.4     tidyr_1.3.1       
##  [5] lattice_0.22-7     digest_0.6.37      magrittr_2.0.4     evaluate_1.0.5    
##  [9] grid_4.5.1         iterators_1.0.14   fastmap_1.2.0      foreach_1.5.2     
## [13] jsonlite_2.0.0     ggrepel_0.9.6      seriation_1.5.8    gridExtra_2.3     
## [17] purrr_1.1.0        viridisLite_0.4.2  scales_1.4.0       tweenr_2.0.3      
## [21] codetools_0.2-20   jquerylib_0.1.4    registry_0.5-1     cli_3.6.5         
## [25] rlang_1.1.7        graphlayouts_1.2.2 polyclip_1.10-7    tidygraph_1.3.1   
## [29] withr_3.0.2        cachem_1.1.0       yaml_2.3.10        tools_4.5.1       
## [33] memoise_2.0.1      ca_0.71.1          TSP_1.2.6          vctrs_0.7.1       
## [37] R6_2.6.1           lifecycle_1.0.5    MASS_7.3-65        ggraph_2.2.2      
## [41] pkgconfig_2.0.3    pillar_1.11.1      bslib_0.9.0        gtable_0.3.6      
## [45] glue_1.8.0         Rcpp_1.1.0         ggforce_0.5.0      xfun_0.53         
## [49] tibble_3.3.0       tidyselect_1.2.1   rstudioapi_0.17.1  knitr_1.50        
## [53] farver_2.1.2       htmltools_0.5.8.1  igraph_2.2.1       rmarkdown_2.30    
## [57] labeling_0.4.3     compiler_4.5.1     S7_0.2.0