Creating the Dataset
ESS asks many questions about peoples lives. I selected variables
that might relate to income feelings:
set.seed(42)
n <- 2000 # sample size
# simulate ESS-like data for 26 countries
ess_data <- data.frame(
# Country
country = sample(c("Austria", "Belgium", "Bulgaria", "Croatia", "Cyprus",
"Czechia", "Estonia", "Finland", "France", "Germany",
"Greece", "Hungary", "Iceland", "Ireland", "Italy",
"Lithuania", "Netherlands", "Norway", "Poland", "Portugal",
"Slovakia", "Slovenia", "Spain", "Sweden", "Switzerland", "UK"),
n, replace = TRUE),
# Age groups
age_group = sample(c("18-30", "31-45", "46-60", "61+"), n, replace = TRUE,
prob = c(0.2, 0.3, 0.3, 0.2)),
# Education level
education = sample(c("Low", "Medium", "High"), n, replace = TRUE,
prob = c(0.25, 0.45, 0.30)),
# Employment status
employment = sample(c("Employed", "Unemployed", "Retired", "Student", "Other"),
n, replace = TRUE, prob = c(0.50, 0.08, 0.22, 0.10, 0.10)),
# Health status
health = sample(c("Very_Good", "Good", "Fair", "Bad"), n, replace = TRUE,
prob = c(0.25, 0.40, 0.25, 0.10)),
# Living area
area = sample(c("Big_City", "Suburbs", "Town", "Village"), n, replace = TRUE,
prob = c(0.25, 0.25, 0.30, 0.20)),
# Household size
hh_size = sample(c("1_person", "2_people", "3_people", "4+_people"), n, replace = TRUE,
prob = c(0.25, 0.35, 0.20, 0.20)),
# Life satisfaction (1-10 scale converted to categories)
life_sat = sample(c("Low_Satisfaction", "Medium_Satisfaction", "High_Satisfaction"),
n, replace = TRUE, prob = c(0.20, 0.45, 0.35)),
# Social meetings frequency
social = sample(c("Rarely", "Sometimes", "Often"), n, replace = TRUE,
prob = c(0.25, 0.45, 0.30)),
# Trust in people
trust = sample(c("Low_Trust", "Medium_Trust", "High_Trust"), n, replace = TRUE,
prob = c(0.30, 0.40, 0.30))
)
# Now create hincfel with realistic associations
# People with certain characteristics more likely to feel comfortable
ess_data$hincfel <- NA
for (i in 1:n) {
prob_comfortable <- 0.25 # base probability
# education effect
if (ess_data$education[i] == "High") prob_comfortable <- prob_comfortable + 0.20
if (ess_data$education[i] == "Low") prob_comfortable <- prob_comfortable - 0.10
# employment effect
if (ess_data$employment[i] == "Employed") prob_comfortable <- prob_comfortable + 0.15
if (ess_data$employment[i] == "Unemployed") prob_comfortable <- prob_comfortable - 0.20
# health effect
if (ess_data$health[i] == "Very_Good") prob_comfortable <- prob_comfortable + 0.10
if (ess_data$health[i] == "Bad") prob_comfortable <- prob_comfortable - 0.15
# life satisfaction effect
if (ess_data$life_sat[i] == "High_Satisfaction") prob_comfortable <- prob_comfortable + 0.15
if (ess_data$life_sat[i] == "Low_Satisfaction") prob_comfortable <- prob_comfortable - 0.15
# country effect (Nordic countries more comfortable)
if (ess_data$country[i] %in% c("Norway", "Sweden", "Finland", "Denmark", "Switzerland")) {
prob_comfortable <- prob_comfortable + 0.15
}
if (ess_data$country[i] %in% c("Bulgaria", "Greece", "Hungary")) {
prob_comfortable <- prob_comfortable - 0.10
}
# keep probability between 0.05 and 0.95
prob_comfortable <- max(0.05, min(0.95, prob_comfortable))
# assign hincfel category
rand <- runif(1)
if (rand < prob_comfortable * 0.4) {
ess_data$hincfel[i] <- "Living_Comfortably"
} else if (rand < prob_comfortable * 0.4 + 0.35) {
ess_data$hincfel[i] <- "Coping"
} else if (rand < prob_comfortable * 0.4 + 0.35 + 0.20) {
ess_data$hincfel[i] <- "Difficult"
} else {
ess_data$hincfel[i] <- "Very_Difficult"
}
}
# convert all to factors
ess_data <- as.data.frame(lapply(ess_data, as.factor))