library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(arules)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
## Attaching package: 'arules'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(arulesViz)
ess <- read_csv("~/Desktop/tasks/ESS11e04_1.csv", show_col_types = FALSE)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
The European Social Survey Round 11 integrated file is used to ensure the broadest possible geographical coverage. The variable of interest is hincfel which measures how respondents feel about their household income nowadays.
"hincfel" %in% names(ess)
## [1] TRUE
table(ess$hincfel, useNA = "ifany")
##
## 1 2 3 4 7 8 9
## 15585 22011 8597 3218 488 198 19
length(unique(ess$cntry))
## [1] 30
Only valid substantive responses are kept and hincfel is recoded into interpretable categories.
ess_clean <- ess %>%
filter(hincfel %in% 1:4) %>%
mutate(
hincfel = factor(
hincfel,
levels = 1:4,
labels = c("Comfortable", "Coping", "Difficult", "Very_difficult")
)
)
table(ess_clean$hincfel)
##
## Comfortable Coping Difficult Very_difficult
## 15585 22011 8597 3218
Variables related to subjective well-being, health, education, and labour market status are selected as these are expected to be related to income perception.
vars <- ess_clean %>%
select(
hincfel,
happy,
stflife,
health,
eisced,
mainact
)
Variables are discretized into high and low categories to make them suitable for association rule mining and to reduce noise from middle values.
vars_disc <- vars %>%
mutate(
happy = case_when(
happy >= 8 ~ "Happy_high",
happy <= 4 ~ "Happy_low",
TRUE ~ NA_character_
),
stflife = case_when(
stflife >= 8 ~ "LifeSat_high",
stflife <= 4 ~ "LifeSat_low",
TRUE ~ NA_character_
),
health = case_when(
health %in% c(1, 2) ~ "Health_good",
health %in% c(4, 5) ~ "Health_bad",
TRUE ~ NA_character_
),
eisced = case_when(
eisced <= 2 ~ "Edu_low",
eisced >= 5 ~ "Edu_high",
TRUE ~ NA_character_
),
mainact = case_when(
mainact == 1 ~ "Employed",
mainact == 3 ~ "Unemployed",
TRUE ~ NA_character_
)
) %>%
drop_na()
dim(vars_disc)
## [1] 947 6
Each respondent is treated as one transaction, and their characteristics are treated as items.
trans <- as(vars_disc, "transactions")
## Warning: Column(s) 2, 3, 4, 5, 6 not logical or factor. Applying default
## discretization (see '? discretizeDF').
summary(trans)
## transactions as itemMatrix in sparse format with
## 947 rows (elements/itemsets/transactions) and
## 14 columns (items) and a density of 0.4285714
##
## most frequent items:
## mainact=Employed health=Health_good happy=Happy_high
## 928 917 915
## stflife=LifeSat_high eisced=Edu_high (Other)
## 901 858 1163
##
## element (itemset/transaction) length distribution:
## sizes
## 6
## 947
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6 6 6 6 6 6
##
## includes extended item information - examples:
## labels variables levels
## 1 hincfel=Comfortable hincfel Comfortable
## 2 hincfel=Coping hincfel Coping
## 3 hincfel=Difficult hincfel Difficult
##
## includes extended transaction information - examples:
## transactionID
## 1 1
## 2 2
## 3 3
Association rules are generated with hincfel fixed as the consequent in order to identify factors associated with different income perceptions.
rules <- apriori(
trans,
parameter = list(supp = 0.02, conf = 0.6),
appearance = list(rhs = c(
"hincfel=Comfortable",
"hincfel=Coping",
"hincfel=Difficult",
"hincfel=Very_difficult"
)),
control = list(verbose = FALSE)
)
length(rules)
## [1] 32
inspect(sort(rules, by = "lift")[1:10])
## lhs rhs support confidence coverage lift count
## [1] {stflife=LifeSat_high,
## health=Health_good,
## eisced=Edu_high,
## mainact=Employed} => {hincfel=Comfortable} 0.5860612 0.6954887 0.8426610 1.060592 555
## [2] {happy=Happy_high,
## stflife=LifeSat_high,
## health=Health_good,
## eisced=Edu_high,
## mainact=Employed} => {hincfel=Comfortable} 0.5850053 0.6951066 0.8416051 1.060010 554
## [3] {stflife=LifeSat_high,
## eisced=Edu_high,
## mainact=Employed} => {hincfel=Comfortable} 0.5945090 0.6924969 0.8585005 1.056030 563
## [4] {happy=Happy_high,
## stflife=LifeSat_high,
## eisced=Edu_high,
## mainact=Employed} => {hincfel=Comfortable} 0.5934530 0.6921182 0.8574446 1.055452 562
## [5] {stflife=LifeSat_high,
## health=Health_good,
## eisced=Edu_high} => {hincfel=Comfortable} 0.5881732 0.6910670 0.8511088 1.053849 557
## [6] {happy=Happy_high,
## stflife=LifeSat_high,
## health=Health_good,
## eisced=Edu_high} => {hincfel=Comfortable} 0.5871172 0.6906832 0.8500528 1.053264 556
## [7] {stflife=LifeSat_high,
## eisced=Edu_high} => {hincfel=Comfortable} 0.5966209 0.6881851 0.8669483 1.049455 565
## [8] {happy=Happy_high,
## stflife=LifeSat_high,
## eisced=Edu_high} => {hincfel=Comfortable} 0.5955649 0.6878049 0.8658923 1.048875 564
## [9] {happy=Happy_high,
## health=Health_good,
## eisced=Edu_high,
## mainact=Employed} => {hincfel=Comfortable} 0.5860612 0.6868812 0.8532207 1.047466 555
## [10] {happy=Happy_high,
## eisced=Edu_high,
## mainact=Employed} => {hincfel=Comfortable} 0.5945090 0.6840826 0.8690602 1.043198 563
plot(rules, measure = c("support", "confidence"), shading = "lift")
plot(sort(rules, by = "lift")[1:10], method = "graph", engine = "htmlwidget")
The extracted association rules show clear patterns between subjective well-being, labor market status, and perceived household income. Rules with high lift indicate that respondents who report low happiness, low life satisfaction, poor health or unemployment are more likely to report that living on their current income is difficult or very difficult. Overall, the results suggest that income perception is strongly linked to both economic position and subjective well-being and these relationships are consistent across a broad set of European countries.