1 Introduction

This analysis explores association rules in the European Social Survey (Round 11) data, with a focus on predicting feelings about household income (hincfel) based on other sociodemographic variables.

2 Load Required Libraries

# Install packages if needed
required_packages <- c("haven", "dplyr", "arules", "arulesViz", "ggplot2", "tidyr")

for(pkg in required_packages) {
  if(!require(pkg, character.only = TRUE)) {
    install.packages(pkg)
    library(pkg, character.only = TRUE)
  }
}

3 Load and Prepare Data

# Load the data
data <- read.csv("/Users/bayu/Desktop/STUDY/1st Year/Unsupervised Learning/Homework/Association Rules/ESS11e04_1/ESS11e04_1.csv", stringsAsFactors = FALSE)

# Display dataset dimensions
cat("Dataset dimensions:", dim(data)[1], "rows,", dim(data)[2], "columns\n")
## Dataset dimensions: 50116 rows, 691 columns

4 Data Preparation for Association Rules

# Select relevant variables for analysis
# Including sociodemographic and attitudinal variables
selected_vars <- data %>%
  select(
    cntry,        # Country - for geographical coverage
    hincfel,      # Feeling about household income (TARGET)
    gndr,         # Gender
    agea,         # Age
    eduyrs,       # Years of education
    emplrel,      # Employment relation
    hinctnta,     # Household income decile
    health,       # Subjective general health
    happy,        # How happy are you
    stflife,      # How satisfied with life
    stfeco,       # How satisfied with state of economy
    stfgov        # How satisfied with government
  ) %>%
  # Remove rows with missing target variable
  filter(!is.na(hincfel))

# Check geographical coverage
cat("\nCountries in dataset:\n")
## 
## Countries in dataset:
table(selected_vars$cntry)
## 
##   AT   BE   BG   CH   CY   DE   EE   ES   FI   FR   GB   GR   HR   HU   IE   IL 
## 2354 1594 2239 1384  685 2420 1293 1844 1563 1771 1684 2757 1563 2118 2017  906 
##   IS   IT   LT   LV   ME   NL   NO   PL   PT   RS   SE   SI   SK   UA 
##  842 2865 1365 1252 1609 1695 1337 1442 1373 1563 1230 1248 1442 2661
cat("\nNumber of countries:", length(unique(selected_vars$cntry)), "\n")
## 
## Number of countries: 30

5 Inspect Target Variable: hincfel

# Examine the distribution of hincfel
cat("Distribution of 'hincfel' (Feeling about household income):\n")
## Distribution of 'hincfel' (Feeling about household income):
table(selected_vars$hincfel, useNA = "ifany")
## 
##     1     2     3     4     7     8     9 
## 15585 22011  8597  3218   488   198    19
# Visualize distribution
ggplot(selected_vars %>% filter(!is.na(hincfel)), 
       aes(x = factor(hincfel))) +
  geom_bar(fill = "steelblue", alpha = 0.8) +
  labs(title = "Distribution of Feelings about Household Income",
       x = "Income Feeling (1=Living comfortably, 4=Very difficult)",
       y = "Count") +
  theme_minimal()

6 Create Categorical Variables for Association Rules

# Create categorical bins for continuous variables and label all variables
rules_data <- selected_vars %>%
  mutate(
    # Categorize age
    age_group = cut(agea, 
                    breaks = c(0, 30, 50, 70, Inf),
                    labels = c("Young", "Middle", "Senior", "Elderly"),
                    include.lowest = TRUE),
    
    # Categorize education years
    education = cut(eduyrs,
                    breaks = c(0, 10, 15, Inf),
                    labels = c("Low_edu", "Medium_edu", "High_edu"),
                    include.lowest = TRUE),
    
    # Categorize satisfaction variables
    life_satisfaction = cut(stflife,
                           breaks = c(0, 5, 7, 10),
                           labels = c("Low_stflife", "Medium_stflife", "High_stflife"),
                           include.lowest = TRUE),
    
    economy_satisfaction = cut(stfeco,
                              breaks = c(0, 5, 7, 10),
                              labels = c("Low_stfeco", "Medium_stfeco", "High_stfeco"),
                              include.lowest = TRUE),
    
    # Label target variable clearly
    income_feeling = case_when(
      hincfel == 1 ~ "Comfortable",
      hincfel == 2 ~ "Coping",
      hincfel == 3 ~ "Difficult",
      hincfel == 4 ~ "Very_difficult",
      TRUE ~ NA_character_
    ),
    
    # Label gender
    gender = case_when(
      gndr == 1 ~ "Male",
      gndr == 2 ~ "Female",
      TRUE ~ NA_character_
    ),
    
    # Label health
    health_status = case_when(
      health <= 2 ~ "Good_health",
      health == 3 ~ "Fair_health",
      health >= 4 ~ "Poor_health",
      TRUE ~ NA_character_
    ),
    
    # Categorize happiness
    happiness = case_when(
      happy <= 5 ~ "Low_happy",
      happy <= 7 ~ "Medium_happy",
      happy > 7 ~ "High_happy",
      TRUE ~ NA_character_
    )
  ) %>%
  select(income_feeling, age_group, education, gender, health_status, 
         happiness, life_satisfaction, economy_satisfaction) %>%
  na.omit()  # Remove rows with any missing values

cat("Prepared data dimensions:", nrow(rules_data), "rows\n")
## Prepared data dimensions: 48316 rows
cat("First few rows:\n")
## First few rows:
head(rules_data)

7 Convert to Transaction Format

# Convert to transaction format for arules
transactions <- as(rules_data, "transactions")

# Inspect transactions
cat("Transaction summary:\n")
## Transaction summary:
summary(transactions)
## transactions as itemMatrix in sparse format with
##  48316 rows (elements/itemsets/transactions) and
##  25 columns (items) and a density of 0.32 
## 
## most frequent items:
## economy_satisfaction=Low_stfeco       health_status=Good_health 
##                           31918                           31797 
##            happiness=High_happy                   gender=Female 
##                           26438                           25931 
##            education=Medium_edu                         (Other) 
##                           25361                          245083 
## 
## element (itemset/transaction) length distribution:
## sizes
##     8 
## 48316 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       8       8       8       8       8       8 
## 
## includes extended item information - examples:
##                       labels      variables      levels
## 1 income_feeling=Comfortable income_feeling Comfortable
## 2      income_feeling=Coping income_feeling      Coping
## 3   income_feeling=Difficult income_feeling   Difficult
## 
## includes extended transaction information - examples:
##   transactionID
## 1             1
## 2             2
## 3             3
# Visualize item frequencies
itemFrequencyPlot(transactions, topN = 15, type = "absolute",
                  col = "steelblue", 
                  main = "Top 15 Most Frequent Items")

8 Generate Association Rules with hincfel as Consequent

# Generate rules with income_feeling as consequent (right-hand side)
rules <- apriori(transactions,
                 parameter = list(
                   supp = 0.01,      # Minimum support: 1%
                   conf = 0.5,       # Minimum confidence: 50%
                   minlen = 2,       # Minimum rule length
                   maxlen = 5        # Maximum rule length
                 ),
                 appearance = list(
                   rhs = c("income_feeling=Comfortable",
                          "income_feeling=Coping", 
                          "income_feeling=Difficult",
                          "income_feeling=Very_difficult"),
                   default = "lhs"   # All other items on left-hand side
                 ))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.5    0.1    1 none FALSE            TRUE       5    0.01      2
##  maxlen target  ext
##       5  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 483 
## 
## set item appearances ...[4 item(s)] done [0.00s].
## set transactions ...[25 item(s), 48316 transaction(s)] done [0.01s].
## sorting and recoding items ... [25 item(s)] done [0.00s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 4 5
##  done [0.00s].
## writing ... [484 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
cat("Number of rules generated:", length(rules), "\n")
## Number of rules generated: 484

9 Analyze and Interpret Rules

# Sort rules by confidence
rules_sorted <- sort(rules, by = "confidence", decreasing = TRUE)

# Display top 20 rules
cat("\nTop 20 Rules by Confidence:\n")
## 
## Top 20 Rules by Confidence:
inspect(head(rules_sorted, 20))
##      lhs                                     rhs                             support confidence   coverage     lift count
## [1]  {education=High_edu,                                                                                                
##       gender=Male,                                                                                                       
##       happiness=High_happy,                                                                                              
##       economy_satisfaction=High_stfeco}   => {income_feeling=Comfortable} 0.01057621  0.7187060 0.01471562 2.266645   511
## [2]  {education=High_edu,                                                                                                
##       gender=Male,                                                                                                       
##       life_satisfaction=High_stflife,                                                                                    
##       economy_satisfaction=High_stfeco}   => {income_feeling=Comfortable} 0.01053481  0.7169014 0.01469493 2.260954   509
## [3]  {education=High_edu,                                                                                                
##       health_status=Good_health,                                                                                         
##       life_satisfaction=High_stflife,                                                                                    
##       economy_satisfaction=High_stfeco}   => {income_feeling=Comfortable} 0.01558490  0.7043966 0.02212518 2.221516   753
## [4]  {education=High_edu,                                                                                                
##       gender=Male,                                                                                                       
##       health_status=Good_health,                                                                                         
##       economy_satisfaction=High_stfeco}   => {income_feeling=Comfortable} 0.01012087  0.7015782 0.01442586 2.212627   489
## [5]  {education=High_edu,                                                                                                
##       health_status=Good_health,                                                                                         
##       happiness=High_happy,                                                                                              
##       economy_satisfaction=High_stfeco}   => {income_feeling=Comfortable} 0.01541932  0.7015066 0.02198030 2.212402   745
## [6]  {education=High_edu,                                                                                                
##       happiness=High_happy,                                                                                              
##       life_satisfaction=High_stflife,                                                                                    
##       economy_satisfaction=High_stfeco}   => {income_feeling=Comfortable} 0.01666115  0.6945643 0.02398791 2.190507   805
## [7]  {age_group=Senior,                                                                                                  
##       education=High_edu,                                                                                                
##       gender=Male,                                                                                                       
##       life_satisfaction=High_stflife}     => {income_feeling=Comfortable} 0.01661975  0.6916451 0.02402931 2.181301   803
## [8]  {education=High_edu,                                                                                                
##       happiness=High_happy,                                                                                              
##       economy_satisfaction=High_stfeco}   => {income_feeling=Comfortable} 0.01777879  0.6844622 0.02597483 2.158647   859
## [9]  {age_group=Senior,                                                                                                  
##       education=High_edu,                                                                                                
##       life_satisfaction=High_stflife,                                                                                    
##       economy_satisfaction=Medium_stfeco} => {income_feeling=Comfortable} 0.01239755  0.6837900 0.01813064 2.156527   599
## [10] {education=High_edu,                                                                                                
##       life_satisfaction=High_stflife,                                                                                    
##       economy_satisfaction=High_stfeco}   => {income_feeling=Comfortable} 0.01782018  0.6827914 0.02609901 2.153378   861
## [11] {education=High_edu,                                                                                                
##       gender=Male,                                                                                                       
##       life_satisfaction=High_stflife,                                                                                    
##       economy_satisfaction=Medium_stfeco} => {income_feeling=Comfortable} 0.01823413  0.6824167 0.02671993 2.152196   881
## [12] {education=High_edu,                                                                                                
##       gender=Male,                                                                                                       
##       economy_satisfaction=High_stfeco}   => {income_feeling=Comfortable} 0.01192152  0.6784452 0.01757182 2.139671   576
## [13] {age_group=Senior,                                                                                                  
##       education=High_edu,                                                                                                
##       happiness=High_happy,                                                                                              
##       economy_satisfaction=Medium_stfeco} => {income_feeling=Comfortable} 0.01291498  0.6760563 0.01910340 2.132137   624
## [14] {education=High_edu,                                                                                                
##       health_status=Good_health,                                                                                         
##       economy_satisfaction=High_stfeco}   => {income_feeling=Comfortable} 0.01713718  0.6715328 0.02551950 2.117871   828
## [15] {education=High_edu,                                                                                                
##       gender=Male,                                                                                                       
##       happiness=High_happy,                                                                                              
##       economy_satisfaction=Medium_stfeco} => {income_feeling=Comfortable} 0.01860667  0.6674091 0.02787896 2.104865   899
## [16] {age_group=Senior,                                                                                                  
##       education=High_edu,                                                                                                
##       gender=Male,                                                                                                       
##       happiness=High_happy}               => {income_feeling=Comfortable} 0.01724067  0.6669335 0.02585065 2.103366   833
## [17] {age_group=Elderly,                                                                                                 
##       education=High_edu,                                                                                                
##       happiness=High_happy,                                                                                              
##       life_satisfaction=High_stflife}     => {income_feeling=Comfortable} 0.01057621  0.6636364 0.01593675 2.092967   511
## [18] {age_group=Senior,                                                                                                  
##       education=High_edu,                                                                                                
##       health_status=Good_health,                                                                                         
##       life_satisfaction=High_stflife}     => {income_feeling=Comfortable} 0.02692690  0.6627611 0.04062836 2.090207  1301
## [19] {education=High_edu,                                                                                                
##       gender=Male,                                                                                                       
##       health_status=Good_health,                                                                                         
##       life_satisfaction=High_stflife}     => {income_feeling=Comfortable} 0.04228413  0.6603103 0.06403676 2.082477  2043
## [20] {age_group=Senior,                                                                                                  
##       education=High_edu,                                                                                                
##       health_status=Good_health,                                                                                         
##       economy_satisfaction=Medium_stfeco} => {income_feeling=Comfortable} 0.01289428  0.6585624 0.01957944 2.076965   623
# Get summary statistics
cat("\nRule Quality Measures Summary:\n")
## 
## Rule Quality Measures Summary:
summary(quality(rules_sorted))
##     support          confidence        coverage            lift      
##  Min.   :0.01004   Min.   :0.5000   Min.   :0.01443   Min.   :1.121  
##  1st Qu.:0.01480   1st Qu.:0.5145   1st Qu.:0.02654   1st Qu.:1.163  
##  Median :0.02139   Median :0.5303   Median :0.03992   Median :1.224  
##  Mean   :0.02860   Mean   :0.5440   Mean   :0.05321   Mean   :1.425  
##  3rd Qu.:0.03518   3rd Qu.:0.5563   3rd Qu.:0.06539   3rd Qu.:1.705  
##  Max.   :0.16742   Max.   :0.7187   Max.   :0.32718   Max.   :2.267  
##      count     
##  Min.   : 485  
##  1st Qu.: 715  
##  Median :1034  
##  Mean   :1382  
##  3rd Qu.:1700  
##  Max.   :8089

9.1 Rules for Each Income Feeling Category

# Rules predicting "Comfortable"
rules_comfortable <- subset(rules, subset = rhs %in% "income_feeling=Comfortable")
cat("\nTop 10 Rules for 'Comfortable' Income Feeling:\n")
## 
## Top 10 Rules for 'Comfortable' Income Feeling:
inspect(head(sort(rules_comfortable, by = "confidence"), 10))
##      lhs                                     rhs                             support confidence   coverage     lift count
## [1]  {education=High_edu,                                                                                                
##       gender=Male,                                                                                                       
##       happiness=High_happy,                                                                                              
##       economy_satisfaction=High_stfeco}   => {income_feeling=Comfortable} 0.01057621  0.7187060 0.01471562 2.266645   511
## [2]  {education=High_edu,                                                                                                
##       gender=Male,                                                                                                       
##       life_satisfaction=High_stflife,                                                                                    
##       economy_satisfaction=High_stfeco}   => {income_feeling=Comfortable} 0.01053481  0.7169014 0.01469493 2.260954   509
## [3]  {education=High_edu,                                                                                                
##       health_status=Good_health,                                                                                         
##       life_satisfaction=High_stflife,                                                                                    
##       economy_satisfaction=High_stfeco}   => {income_feeling=Comfortable} 0.01558490  0.7043966 0.02212518 2.221516   753
## [4]  {education=High_edu,                                                                                                
##       gender=Male,                                                                                                       
##       health_status=Good_health,                                                                                         
##       economy_satisfaction=High_stfeco}   => {income_feeling=Comfortable} 0.01012087  0.7015782 0.01442586 2.212627   489
## [5]  {education=High_edu,                                                                                                
##       health_status=Good_health,                                                                                         
##       happiness=High_happy,                                                                                              
##       economy_satisfaction=High_stfeco}   => {income_feeling=Comfortable} 0.01541932  0.7015066 0.02198030 2.212402   745
## [6]  {education=High_edu,                                                                                                
##       happiness=High_happy,                                                                                              
##       life_satisfaction=High_stflife,                                                                                    
##       economy_satisfaction=High_stfeco}   => {income_feeling=Comfortable} 0.01666115  0.6945643 0.02398791 2.190507   805
## [7]  {age_group=Senior,                                                                                                  
##       education=High_edu,                                                                                                
##       gender=Male,                                                                                                       
##       life_satisfaction=High_stflife}     => {income_feeling=Comfortable} 0.01661975  0.6916451 0.02402931 2.181301   803
## [8]  {education=High_edu,                                                                                                
##       happiness=High_happy,                                                                                              
##       economy_satisfaction=High_stfeco}   => {income_feeling=Comfortable} 0.01777879  0.6844622 0.02597483 2.158647   859
## [9]  {age_group=Senior,                                                                                                  
##       education=High_edu,                                                                                                
##       life_satisfaction=High_stflife,                                                                                    
##       economy_satisfaction=Medium_stfeco} => {income_feeling=Comfortable} 0.01239755  0.6837900 0.01813064 2.156527   599
## [10] {education=High_edu,                                                                                                
##       life_satisfaction=High_stflife,                                                                                    
##       economy_satisfaction=High_stfeco}   => {income_feeling=Comfortable} 0.01782018  0.6827914 0.02609901 2.153378   861
# Rules predicting "Difficult" or "Very_difficult"
rules_difficult <- subset(rules, subset = rhs %in% c("income_feeling=Difficult", 
                                                      "income_feeling=Very_difficult"))
cat("\nTop 10 Rules for 'Difficult' Income Feelings:\n")
## 
## Top 10 Rules for 'Difficult' Income Feelings:
inspect(head(sort(rules_difficult, by = "confidence"), 10))

10 Key Insights and Interpretation

# Extract top rules for interpretation
top_comfortable <- head(sort(rules_comfortable, by = "lift"), 5)
top_difficult <- head(sort(rules_difficult, by = "lift"), 5)

cat("\n=== KEY FINDINGS ===\n\n")
## 
## === KEY FINDINGS ===
cat("Strong Predictors of COMFORTABLE Income Feeling:\n")
## Strong Predictors of COMFORTABLE Income Feeling:
if(length(top_comfortable) > 0) {
  inspect(top_comfortable)
} else {
  cat("No strong rules found.\n")
}
##     lhs                                   rhs                             support confidence   coverage     lift count
## [1] {education=High_edu,                                                                                              
##      gender=Male,                                                                                                     
##      happiness=High_happy,                                                                                            
##      economy_satisfaction=High_stfeco} => {income_feeling=Comfortable} 0.01057621  0.7187060 0.01471562 2.266645   511
## [2] {education=High_edu,                                                                                              
##      gender=Male,                                                                                                     
##      life_satisfaction=High_stflife,                                                                                  
##      economy_satisfaction=High_stfeco} => {income_feeling=Comfortable} 0.01053481  0.7169014 0.01469493 2.260954   509
## [3] {education=High_edu,                                                                                              
##      health_status=Good_health,                                                                                       
##      life_satisfaction=High_stflife,                                                                                  
##      economy_satisfaction=High_stfeco} => {income_feeling=Comfortable} 0.01558490  0.7043966 0.02212518 2.221516   753
## [4] {education=High_edu,                                                                                              
##      gender=Male,                                                                                                     
##      health_status=Good_health,                                                                                       
##      economy_satisfaction=High_stfeco} => {income_feeling=Comfortable} 0.01012087  0.7015782 0.01442586 2.212627   489
## [5] {education=High_edu,                                                                                              
##      health_status=Good_health,                                                                                       
##      happiness=High_happy,                                                                                            
##      economy_satisfaction=High_stfeco} => {income_feeling=Comfortable} 0.01541932  0.7015066 0.02198030 2.212402   745
cat("\n\nStrong Predictors of DIFFICULT Income Feeling:\n")
## 
## 
## Strong Predictors of DIFFICULT Income Feeling:
if(length(top_difficult) > 0) {
  inspect(top_difficult)
} else {
  cat("No strong rules found.\n")
}
## No strong rules found.

11 Visualizations

11.1 Scatter Plot: Support vs Confidence

plot(rules, 
     measure = c("support", "confidence"), 
     shading = "lift",
     main = "Association Rules: Support vs Confidence (colored by Lift)")

11.2 Interactive Network Graph (Top Rules)

# Visualize top 30 rules as network
if(length(rules) >= 30) {
  plot(head(rules_sorted, 30), 
       method = "graph",
       main = "Network Graph: Top 30 Association Rules",
       control = list(type = "items"))
} else {
  plot(rules_sorted, 
       method = "graph",
       main = "Network Graph: All Association Rules",
       control = list(type = "items"))
}
## Available control parameters (with default values):
## layout    =  stress
## circular  =  FALSE
## ggraphdots    =  NULL
## edges     =  <environment>
## nodes     =  <environment>
## nodetext  =  <environment>
## colors    =  c("#EE0000FF", "#EEEEEEFF")
## engine    =  ggplot2
## max   =  100
## verbose   =  FALSE

11.3 Grouped Matrix Visualization

# Group rules and visualize as matrix
if(length(rules) > 0) {
  plot(rules_sorted, 
       method = "grouped",
       main = "Grouped Matrix of Association Rules")
}
## Available control parameters (with default values):
## k     =  20
## aggr.fun  =  function (x, ...)  UseMethod("mean")
## rhs_max   =  10
## lhs_label_items   =  2
## col   =  c("#EE0000FF", "#EEEEEEFF")
## groups    =  NULL
## engine    =  ggplot2
## verbose   =  FALSE

11.4 Top Rules by Lift

# Visualize top 15 rules by lift
top_15_rules <- head(sort(rules, by = "lift"), 15)

plot(top_15_rules, 
     method = "paracoord",
     main = "Parallel Coordinates Plot: Top 15 Rules by Lift")

12 Summary and Conclusions

cat("\n=== ANALYSIS SUMMARY ===\n\n")
## 
## === ANALYSIS SUMMARY ===
cat("Total respondents analyzed:", nrow(rules_data), "\n")
## Total respondents analyzed: 48316
cat("Total association rules found:", length(rules), "\n")
## Total association rules found: 484
cat("Rules predicting 'Comfortable':", length(rules_comfortable), "\n")
## Rules predicting 'Comfortable': 191
cat("Rules predicting 'Difficult' feelings:", length(rules_difficult), "\n\n")
## Rules predicting 'Difficult' feelings: 0
cat("Key Metrics:\n")
## Key Metrics:
cat("- Average Support:", round(mean(quality(rules)$support), 4), "\n")
## - Average Support: 0.0286
cat("- Average Confidence:", round(mean(quality(rules)$confidence), 4), "\n")
## - Average Confidence: 0.544
cat("- Average Lift:", round(mean(quality(rules)$lift), 4), "\n\n")
## - Average Lift: 1.4253

The association rules reveal patterns between sociodemographic factors and feelings about household income. High confidence rules indicate strong predictive relationships. Rules with high lift show combinations of factors that are particularly associated with specific income feelings, beyond what would be expected by chance.

13 Export Rules

# Save rules to CSV for further analysis
rules_df <- as(rules_sorted, "data.frame")
write.csv(rules_df, "association_rules_output.csv", row.names = FALSE)