First simulate the product

library(MASS)  # for mvrnorm
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
# Set seed
set.seed(456)

# Number of products to simulate
n_products <- 200

# Simulate health (discrete values from 1 to 5)
health <- sample(1:5, n_products, replace = TRUE)

# Simulate perceived_health with correlation 0.5 with health
# Step 1: Convert health to z-score for correlation purposes
z_health <- scale(health)

# Step 2: Generate correlated normal variable
rho <- 0.5
cov_matrix <- matrix(c(1, rho, rho, 1), nrow = 2)
z_vals <- mvrnorm(n_products, mu = c(0, 0), Sigma = cov_matrix)

# Step 3: Use second correlated variable to generate perceived_health
z_perceived_health <- z_vals[, 2]
perceived_health_continuous <- scale(z_perceived_health)  # normalize
# Rank and match to discrete values
perceived_health <- cut(perceived_health_continuous,
                        breaks = quantile(perceived_health_continuous, probs = seq(0, 1, length.out = 6)),
                        labels = 1:5, include.lowest = TRUE)
perceived_health <- as.integer(perceived_health)

# Simulate taste (1 or 2)
taste <- sample(1:2, n_products, replace = TRUE)

# Correlated perceived_taste
z_taste <- scale(taste)
z_vals_taste <- mvrnorm(n_products, mu = c(0, 0), Sigma = cov_matrix)
z_perceived_taste <- z_vals_taste[, 2]
perceived_taste <- cut(z_perceived_taste,
                       breaks = quantile(z_perceived_taste, probs = seq(0, 1, length.out = 3)),
                       labels = 1:2, include.lowest = TRUE)
perceived_taste <- as.integer(perceived_taste)

# Combine into data frame
products <- data.frame(
  health = health,
  perceived_health = perceived_health,
  taste = taste,
  perceived_taste = perceived_taste
)

# Remove duplicates
unique_products <- unique(products)

# View number of unique products and a sample
cat("Number of unique products:", nrow(unique_products), "\n")
## Number of unique products: 89
head(unique_products)
##   health perceived_health taste perceived_taste
## 1      5                2     1               1
## 2      5                2     1               2
## 3      3                4     1               2
## 5      4                5     1               1
## 6      3                4     2               2
## 7      1                2     1               1

Then simulate the user

# Assuming you already have:
# - unique_products: data.frame with product_id, health, perceived_health, taste, perceived_taste
# - n_individuals = 1000

set.seed(123)

# Simulate individual-level parameters
n_individuals <- 4000
alpha <- rnorm(n_individuals, mean = 0.5, sd = 0.2)
beta_1 <- rnorm(n_individuals, mean = 1.2, sd = 0.3)  # weight on perceived health
beta_2 <- rnorm(n_individuals, mean = 0.8, sd = 0.3)  # weight on perceived taste

individuals <- data.frame(
  id = 1:n_individuals,
  alpha = alpha,
  beta_1 = beta_1,
  beta_2 = beta_2
)

# Add product ID
unique_products$product_id <- 1:nrow(unique_products)

# All person-product combinations
full_data <- expand.grid(id = individuals$id, product_id = unique_products$product_id)

# Merge individual parameters
full_data <- full_data %>%
  left_join(individuals, by = "id") %>%
  left_join(unique_products, by = "product_id")

# Simulate epsilon for each choice situation
set.seed(456)
full_data$epsilon <- rnorm(nrow(full_data), 0, 1)

# Compute perceived utility (what drives choice)
full_data$perceived_utility <- with(full_data,
  alpha + beta_1 * perceived_health + beta_2 * perceived_taste + epsilon
)

full_data$utility <- with(full_data,
  alpha + beta_1 * health + beta_2 * taste + epsilon
)

# For each individual, pick the product with highest perceived utility
library(dplyr)

chosen <- full_data %>%
  group_by(id) %>%
  slice_max(perceived_utility, n = 1, with_ties = FALSE) %>%
  ungroup()

chosen_true <- full_data %>%
  group_by(id) %>%
  slice_max(utility, n = 1, with_ties = FALSE) %>%
  ungroup()

Warning label

We apply a warning label intervention that adjusts how individuals perceive a product’s healthiness. The updated perceived health value is stored as perceived_health_with_label.

The rules are as follows:

  1. If health ∈ {1, 2}, and perceived_health ∈ {3, 4, 5 }, perceived_health_with_label is set to the 2.
  2. If perceived_health ∈ {1, 2} but health ≥ 4, then perceived_health_with_label is updated to 3.

Now, people do not make inference no health and taste

# Apply the health warning logic
full_data <- full_data %>%
  mutate(
    perceived_health_with_wlabel = case_when(
      health %in% c(1, 2) & perceived_health %in% c(3, 4, 5) ~ 2,
      perceived_health %in% c(1, 2) & health >= 3 ~ 3,
      TRUE ~ perceived_health
    )
  )

full_data$utility_wlabel <- with(full_data,
  alpha + beta_1 * perceived_health_with_wlabel + beta_2 * perceived_taste + epsilon
)


chosen_wlabel <- full_data %>%
  group_by(id) %>%
  slice_max(utility_wlabel, n = 1, with_ties = FALSE) %>%
  ungroup()

Health Label

We apply a health label intervention that modifies how individuals perceive a product’s healthiness. The adjusted perception is stored in the variable perceived_health_with_hlabel.

The rules are as follows:

  1. If health ∈ {4, 5}, but perceived_health ∈ {1, 2,3}, then then perceived_health_with_hlabel is 4
  2. If perceived_health ∈ {4, 5} but health < 4, then perceived_health_with_hlabel is updated to 3.

Now, individuals do not make inferences about the healthiness or taste beyond what is given.

# Apply the health label logic
full_data <- full_data %>%
  mutate(
    perceived_health_with_hlabel = case_when(
      health %in% c(4, 5) & perceived_health %in% c(1, 2, 3) ~ 4,
      perceived_health %in% c(4, 5) & health < 4 ~ 3,
      TRUE ~ perceived_health
    )
  )

# Calculate utility based on perceived health after applying health label
full_data$utility_hlabel <- with(full_data,
  alpha + beta_1 * perceived_health_with_hlabel + beta_2 * perceived_taste + epsilon
)

# Choice: select product with the highest utility (under health label)
chosen_hlabel <- full_data %>%
  group_by(id) %>%
  slice_max(utility_hlabel, n = 1, with_ties = FALSE) %>%
  ungroup()

Nutrition score label

# Calculate utility based on perceived health after applying health label
full_data$utility_nulabel <- with(full_data,
  alpha + beta_1 * health + beta_2 * perceived_taste + epsilon
)

# Choice: select product with the highest utility (under health label)
chosen_nulabel <- full_data %>%
  group_by(id) %>%
  slice_max(utility_nulabel, n = 1, with_ties = FALSE) %>%
  ungroup()

Comparison

library(ggplot2)

# Sum of utilities across individuals in each scenario
consumer_utility_summary <- data.frame(
  scenario = c("True Utility", "No Label", "Warning Label", "Health Label", 'Nutrition-Score Label'),
  total_utility = c(
    sum(chosen_true$utility),
    sum(chosen$utility),
    sum(chosen_wlabel$utility),
    sum(chosen_hlabel$utility),
    sum(chosen_nulabel$utility)
  )
)
# Calculate increase relative to 'No Label' condition
baseline_utility <- consumer_utility_summary$total_utility[consumer_utility_summary$scenario == "No Label"]
consumer_utility_summary$utility_increase <- consumer_utility_summary$total_utility - baseline_utility
consumer_utility_summary$percent_increase <- 100 * (consumer_utility_summary$utility_increase / baseline_utility)
library(ggplot2)

ggplot(consumer_utility_summary, aes(x = scenario, y = total_utility)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  geom_text(aes(label = ifelse(scenario == "No Label", "", 
                               paste0("+", round(percent_increase, 1), "%"))),
            vjust = -0.5, size = 5) +
  labs(
    title = "Total Consumer Utility Under Different Labeling Scenarios",
    x = "Scenario",
    y = "Total Utility"
  ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.15)))+
  theme_minimal() +
  theme(
    text = element_text(size = 14),
    axis.text.x = element_text(angle = 15, hjust = 1)
  )

library(dplyr)

# Calculate how many products were affected by the warning label intervention
warning_label_changes <- full_data %>%
  dplyr::select(product_id, health, perceived_health, perceived_health_with_wlabel) %>%
  distinct() %>%
  filter(perceived_health != perceived_health_with_wlabel) %>%
  nrow()

# Calculate how many products were affected by the health label intervention
health_label_changes <- full_data %>%
  dplyr::select(product_id, health, perceived_health, perceived_health_with_hlabel) %>%
  distinct() %>%
  filter(perceived_health != perceived_health_with_hlabel) %>%
  nrow()

# For nutrition score label, we don't modify perceived_health directly
# Instead, we use the true health value in the utility calculation
# But we can count how many products would have a different perception if we did this
nutrition_score_changes <- full_data %>%
  dplyr::select(product_id, health, perceived_health) %>%
  distinct() %>%
  filter(health != perceived_health) %>%
  nrow()

# Print the results
cat("Number of products with updated perceived health in Warning Label scenario:", warning_label_changes, "\n")
## Number of products with updated perceived health in Warning Label scenario: 40
cat("Number of products with updated perceived health in Health Label scenario:", health_label_changes, "\n")
## Number of products with updated perceived health in Health Label scenario: 42
cat("Number of products with different true vs perceived health (Nutrition Score scenario):", nutrition_score_changes, "\n")
## Number of products with different true vs perceived health (Nutrition Score scenario): 69
# We can also calculate the percentage of products affected in each scenario
total_products <- length(unique(full_data$product_id))
cat("\nPercentage of products affected:\n")
## 
## Percentage of products affected:
cat("Warning Label scenario:", round(warning_label_changes/total_products*100, 2), "%\n")
## Warning Label scenario: 44.94 %
cat("Health Label scenario:", round(health_label_changes/total_products*100, 2), "%\n")
## Health Label scenario: 47.19 %
cat("Nutrition Score scenario:", round(nutrition_score_changes/total_products*100, 2), "%\n")
## Nutrition Score scenario: 77.53 %
# We can also examine the specific changes in perceived health for each scenario
# Warning Label changes
warning_label_details <- full_data %>%
  dplyr::select(product_id, health, perceived_health, perceived_health_with_wlabel) %>%
  distinct() %>%
  filter(perceived_health != perceived_health_with_wlabel) %>%
  mutate(change_type = case_when(
    health %in% c(1, 2) & perceived_health %in% c(3, 4, 5) ~ "Unhealthy product perceived as healthy -> corrected to 2",
    perceived_health %in% c(1, 2) & health >= 3 ~ "Healthy product perceived as unhealthy -> updated to 3",
    TRUE ~ "Other"
  ))

# Count by change type for warning label
warning_label_counts <- warning_label_details %>%
  count(change_type) %>%
  arrange(desc(n))

# Health Label changes
health_label_details <- full_data %>%
  dplyr::select(product_id, health, perceived_health, perceived_health_with_hlabel) %>%
  distinct() %>%
  filter(perceived_health != perceived_health_with_hlabel) %>%
  mutate(change_type = case_when(
    health %in% c(4, 5) & perceived_health %in% c(1, 2, 3) ~ "Healthy product perceived as less healthy -> updated to 4",
    perceived_health %in% c(4, 5) & health < 4 ~ "Less healthy product perceived as healthy -> corrected to 3",
    TRUE ~ "Other"
  ))

# Count by change type for health label
health_label_counts <- health_label_details %>%
  count(change_type) %>%
  arrange(desc(n))

# Print detailed results
cat("\nWarning Label changes by type:\n")
## 
## Warning Label changes by type:
print(warning_label_counts)
##                                                change_type  n
## 1   Healthy product perceived as unhealthy -> updated to 3 20
## 2 Unhealthy product perceived as healthy -> corrected to 2 20
cat("\nHealth Label changes by type:\n")
## 
## Health Label changes by type:
print(health_label_counts)
##                                                   change_type  n
## 1 Less healthy product perceived as healthy -> corrected to 3 22
## 2   Healthy product perceived as less healthy -> updated to 4 20