library(MASS) # for mvrnorm
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
# Set seed
set.seed(456)
# Number of products to simulate
n_products <- 200
# Simulate health (discrete values from 1 to 5)
health <- sample(1:5, n_products, replace = TRUE)
# Simulate perceived_health with correlation 0.5 with health
# Step 1: Convert health to z-score for correlation purposes
z_health <- scale(health)
# Step 2: Generate correlated normal variable
rho <- 0.5
cov_matrix <- matrix(c(1, rho, rho, 1), nrow = 2)
z_vals <- mvrnorm(n_products, mu = c(0, 0), Sigma = cov_matrix)
# Step 3: Use second correlated variable to generate perceived_health
z_perceived_health <- z_vals[, 2]
perceived_health_continuous <- scale(z_perceived_health) # normalize
# Rank and match to discrete values
perceived_health <- cut(perceived_health_continuous,
breaks = quantile(perceived_health_continuous, probs = seq(0, 1, length.out = 6)),
labels = 1:5, include.lowest = TRUE)
perceived_health <- as.integer(perceived_health)
# Simulate taste (1 or 2)
taste <- sample(1:2, n_products, replace = TRUE)
# Correlated perceived_taste
z_taste <- scale(taste)
z_vals_taste <- mvrnorm(n_products, mu = c(0, 0), Sigma = cov_matrix)
z_perceived_taste <- z_vals_taste[, 2]
perceived_taste <- cut(z_perceived_taste,
breaks = quantile(z_perceived_taste, probs = seq(0, 1, length.out = 3)),
labels = 1:2, include.lowest = TRUE)
perceived_taste <- as.integer(perceived_taste)
# Combine into data frame
products <- data.frame(
health = health,
perceived_health = perceived_health,
taste = taste,
perceived_taste = perceived_taste
)
# Remove duplicates
unique_products <- unique(products)
# View number of unique products and a sample
cat("Number of unique products:", nrow(unique_products), "\n")
## Number of unique products: 89
head(unique_products)
## health perceived_health taste perceived_taste
## 1 5 2 1 1
## 2 5 2 1 2
## 3 3 4 1 2
## 5 4 5 1 1
## 6 3 4 2 2
## 7 1 2 1 1
# Assuming you already have:
# - unique_products: data.frame with product_id, health, perceived_health, taste, perceived_taste
# - n_individuals = 1000
set.seed(123)
# Simulate individual-level parameters
n_individuals <- 4000
alpha <- rnorm(n_individuals, mean = 0.5, sd = 0.2)
beta_1 <- rnorm(n_individuals, mean = 1.2, sd = 0.3) # weight on perceived health
beta_2 <- rnorm(n_individuals, mean = 0.8, sd = 0.3) # weight on perceived taste
individuals <- data.frame(
id = 1:n_individuals,
alpha = alpha,
beta_1 = beta_1,
beta_2 = beta_2
)
# Add product ID
unique_products$product_id <- 1:nrow(unique_products)
# All person-product combinations
full_data <- expand.grid(id = individuals$id, product_id = unique_products$product_id)
# Merge individual parameters
full_data <- full_data %>%
left_join(individuals, by = "id") %>%
left_join(unique_products, by = "product_id")
# Simulate epsilon for each choice situation
set.seed(456)
full_data$epsilon <- rnorm(nrow(full_data), 0, 1)
# Compute perceived utility (what drives choice)
full_data$perceived_utility <- with(full_data,
alpha + beta_1 * perceived_health + beta_2 * perceived_taste + epsilon
)
full_data$utility <- with(full_data,
alpha + beta_1 * health + beta_2 * taste + epsilon
)
# For each individual, pick the product with highest perceived utility
library(dplyr)
chosen <- full_data %>%
group_by(id) %>%
slice_max(perceived_utility, n = 1, with_ties = FALSE) %>%
ungroup()
chosen_true <- full_data %>%
group_by(id) %>%
slice_max(utility, n = 1, with_ties = FALSE) %>%
ungroup()
We apply a warning label intervention that adjusts how individuals
perceive a product’s healthiness. The updated perceived health value is
stored as perceived_health_with_label
.
The rules are as follows:
health
∈ {1, 2}, and perceived_health
∈
{3, 4, 5 }, perceived_health_with_label
is set to
the 2.perceived_health
∈ {1, 2} but
health
≥ 4, then perceived_health_with_label
is updated to 3.Now, people do not make inference no health and taste
# Apply the health warning logic
full_data <- full_data %>%
mutate(
perceived_health_with_wlabel = case_when(
health %in% c(1, 2) & perceived_health %in% c(3, 4, 5) ~ 2,
perceived_health %in% c(1, 2) & health >= 3 ~ 3,
TRUE ~ perceived_health
)
)
full_data$utility_wlabel <- with(full_data,
alpha + beta_1 * perceived_health_with_wlabel + beta_2 * perceived_taste + epsilon
)
chosen_wlabel <- full_data %>%
group_by(id) %>%
slice_max(utility_wlabel, n = 1, with_ties = FALSE) %>%
ungroup()
We apply a health label intervention that modifies
how individuals perceive a product’s healthiness. The adjusted
perception is stored in the variable
perceived_health_with_hlabel
.
The rules are as follows:
health
∈ {4, 5}, but perceived_health
∈
{1, 2,3}, then then perceived_health_with_hlabel
is 4perceived_health
∈ {4, 5} but
health
< 4, then
perceived_health_with_hlabel
is updated to
3.Now, individuals do not make inferences about the healthiness or taste beyond what is given.
# Apply the health label logic
full_data <- full_data %>%
mutate(
perceived_health_with_hlabel = case_when(
health %in% c(4, 5) & perceived_health %in% c(1, 2, 3) ~ 4,
perceived_health %in% c(4, 5) & health < 4 ~ 3,
TRUE ~ perceived_health
)
)
# Calculate utility based on perceived health after applying health label
full_data$utility_hlabel <- with(full_data,
alpha + beta_1 * perceived_health_with_hlabel + beta_2 * perceived_taste + epsilon
)
# Choice: select product with the highest utility (under health label)
chosen_hlabel <- full_data %>%
group_by(id) %>%
slice_max(utility_hlabel, n = 1, with_ties = FALSE) %>%
ungroup()
# Calculate utility based on perceived health after applying health label
full_data$utility_nulabel <- with(full_data,
alpha + beta_1 * health + beta_2 * perceived_taste + epsilon
)
# Choice: select product with the highest utility (under health label)
chosen_nulabel <- full_data %>%
group_by(id) %>%
slice_max(utility_nulabel, n = 1, with_ties = FALSE) %>%
ungroup()
library(ggplot2)
# Sum of utilities across individuals in each scenario
consumer_utility_summary <- data.frame(
scenario = c("True Utility", "No Label", "Warning Label", "Health Label", 'Nutrition-Score Label'),
total_utility = c(
sum(chosen_true$utility),
sum(chosen$utility),
sum(chosen_wlabel$utility),
sum(chosen_hlabel$utility),
sum(chosen_nulabel$utility)
)
)
# Calculate increase relative to 'No Label' condition
baseline_utility <- consumer_utility_summary$total_utility[consumer_utility_summary$scenario == "No Label"]
consumer_utility_summary$utility_increase <- consumer_utility_summary$total_utility - baseline_utility
consumer_utility_summary$percent_increase <- 100 * (consumer_utility_summary$utility_increase / baseline_utility)
library(ggplot2)
ggplot(consumer_utility_summary, aes(x = scenario, y = total_utility)) +
geom_bar(stat = "identity", fill = "steelblue") +
geom_text(aes(label = ifelse(scenario == "No Label", "",
paste0("+", round(percent_increase, 1), "%"))),
vjust = -0.5, size = 5) +
labs(
title = "Total Consumer Utility Under Different Labeling Scenarios",
x = "Scenario",
y = "Total Utility"
) +
scale_y_continuous(expand = expansion(mult = c(0, 0.15)))+
theme_minimal() +
theme(
text = element_text(size = 14),
axis.text.x = element_text(angle = 15, hjust = 1)
)
library(dplyr)
# Calculate how many products were affected by the warning label intervention
warning_label_changes <- full_data %>%
dplyr::select(product_id, health, perceived_health, perceived_health_with_wlabel) %>%
distinct() %>%
filter(perceived_health != perceived_health_with_wlabel) %>%
nrow()
# Calculate how many products were affected by the health label intervention
health_label_changes <- full_data %>%
dplyr::select(product_id, health, perceived_health, perceived_health_with_hlabel) %>%
distinct() %>%
filter(perceived_health != perceived_health_with_hlabel) %>%
nrow()
# For nutrition score label, we don't modify perceived_health directly
# Instead, we use the true health value in the utility calculation
# But we can count how many products would have a different perception if we did this
nutrition_score_changes <- full_data %>%
dplyr::select(product_id, health, perceived_health) %>%
distinct() %>%
filter(health != perceived_health) %>%
nrow()
# Print the results
cat("Number of products with updated perceived health in Warning Label scenario:", warning_label_changes, "\n")
## Number of products with updated perceived health in Warning Label scenario: 40
cat("Number of products with updated perceived health in Health Label scenario:", health_label_changes, "\n")
## Number of products with updated perceived health in Health Label scenario: 42
cat("Number of products with different true vs perceived health (Nutrition Score scenario):", nutrition_score_changes, "\n")
## Number of products with different true vs perceived health (Nutrition Score scenario): 69
# We can also calculate the percentage of products affected in each scenario
total_products <- length(unique(full_data$product_id))
cat("\nPercentage of products affected:\n")
##
## Percentage of products affected:
cat("Warning Label scenario:", round(warning_label_changes/total_products*100, 2), "%\n")
## Warning Label scenario: 44.94 %
cat("Health Label scenario:", round(health_label_changes/total_products*100, 2), "%\n")
## Health Label scenario: 47.19 %
cat("Nutrition Score scenario:", round(nutrition_score_changes/total_products*100, 2), "%\n")
## Nutrition Score scenario: 77.53 %
# We can also examine the specific changes in perceived health for each scenario
# Warning Label changes
warning_label_details <- full_data %>%
dplyr::select(product_id, health, perceived_health, perceived_health_with_wlabel) %>%
distinct() %>%
filter(perceived_health != perceived_health_with_wlabel) %>%
mutate(change_type = case_when(
health %in% c(1, 2) & perceived_health %in% c(3, 4, 5) ~ "Unhealthy product perceived as healthy -> corrected to 2",
perceived_health %in% c(1, 2) & health >= 3 ~ "Healthy product perceived as unhealthy -> updated to 3",
TRUE ~ "Other"
))
# Count by change type for warning label
warning_label_counts <- warning_label_details %>%
count(change_type) %>%
arrange(desc(n))
# Health Label changes
health_label_details <- full_data %>%
dplyr::select(product_id, health, perceived_health, perceived_health_with_hlabel) %>%
distinct() %>%
filter(perceived_health != perceived_health_with_hlabel) %>%
mutate(change_type = case_when(
health %in% c(4, 5) & perceived_health %in% c(1, 2, 3) ~ "Healthy product perceived as less healthy -> updated to 4",
perceived_health %in% c(4, 5) & health < 4 ~ "Less healthy product perceived as healthy -> corrected to 3",
TRUE ~ "Other"
))
# Count by change type for health label
health_label_counts <- health_label_details %>%
count(change_type) %>%
arrange(desc(n))
# Print detailed results
cat("\nWarning Label changes by type:\n")
##
## Warning Label changes by type:
print(warning_label_counts)
## change_type n
## 1 Healthy product perceived as unhealthy -> updated to 3 20
## 2 Unhealthy product perceived as healthy -> corrected to 2 20
cat("\nHealth Label changes by type:\n")
##
## Health Label changes by type:
print(health_label_counts)
## change_type n
## 1 Less healthy product perceived as healthy -> corrected to 3 22
## 2 Healthy product perceived as less healthy -> updated to 4 20