set.seed(240426)
start_date <- as.Date("2016-01-01")
end_date <- as.Date("2026-04-27")
dates <- seq.Date(start_date, end_date, by = "day")
countries <- c("Germany", "France", "Italy", "Spain", "Poland", "Turkey", "UAE", "USA")
product_segments <- c("Standard", "Barrier", "High-Shrink", "Eco")
customer_segments <- c("Local", "Regional", "Global")
country_effect <- c(
Germany = 1.14,
France = 1.08,
Italy = 1.00,
Spain = 0.92,
Poland = 0.84,
Turkey = 0.78,
UAE = 1.07,
USA = 1.20
)
country_shock_sensitivity <- c(
Germany = 1.03,
France = 1.01,
Italy = 1.00,
Spain = 0.98,
Poland = 0.95,
Turkey = 0.93,
UAE = 1.05,
USA = 1.08
)
product_effect <- c(
Standard = 0.90,
Barrier = 1.10,
`High-Shrink` = 1.20,
Eco = 1.05
)
customer_effect <- c(
Local = 0.95,
Regional = 1.00,
Global = 1.07
)
rows_per_day <- sample(2:4, length(dates), replace = TRUE, prob = c(0.40, 0.40, 0.20))
Date <- rep(dates, rows_per_day)
n <- length(Date)
Country <- sample(countries, n, replace = TRUE, prob = c(0.15, 0.12, 0.10, 0.10, 0.12, 0.11, 0.10, 0.20))
Product_Segment <- sample(product_segments, n, replace = TRUE, prob = c(0.38, 0.24, 0.20, 0.18))
Customer_Segment <- sample(customer_segments, n, replace = TRUE, prob = c(0.46, 0.34, 0.20))
year_num <- as.integer(format(Date, "%Y"))
month_num <- as.integer(format(Date, "%m"))
base_price <- 145
inflation_factor <- 1.02^(as.numeric(Date - as.Date("2016-01-01")) / 365.25)
seasonality <- 1 + 0.018 * sin(2 * pi * month_num / 12) + ifelse(month_num %in% c(11, 12), 0.030, 0)
covid_window <- Date >= as.Date("2020-04-01") & Date <= as.Date("2022-12-31")
covid_shock <- rep(1, n)
covid_shock[covid_window] <- pmin(1.20, pmax(1.14, rnorm(sum(covid_window), mean = 1.165, sd = 0.014)))
iran_prewindow <- Date >= as.Date("2024-10-01") & Date < as.Date("2026-02-28")
iran_window <- Date >= as.Date("2026-02-28") & Date <= as.Date("2026-04-27")
freight_prebuildup <- rep(1, n)
if (sum(iran_prewindow) > 0) {
d <- as.numeric(Date[iran_prewindow] - as.Date("2024-10-01"))
freight_prebuildup[iran_prewindow] <- 1.05 + 0.18 * (d / max(d))
}
freight_conflict <- rep(1, n)
if (sum(iran_window) > 0) {
dd <- as.numeric(Date[iran_window] - as.Date("2026-02-28"))
freight_conflict[iran_window] <- 1.54 - 0.0022 * dd
}
freight_covid <- rep(1, n)
freight_covid[covid_window] <- pmin(1.16, pmax(1.09, rnorm(sum(covid_window), mean = 1.125, sd = 0.015)))
freight_noise <- rnorm(n, mean = 0, sd = 3.8)
Freight_Cost_Index <- 100 * (1 + 0.010 * (year_num - 2016)) *
(1 + 0.012 * sin(2 * pi * month_num / 12)) *
freight_covid * freight_prebuildup * freight_conflict + freight_noise
Freight_Cost_Index <- pmax(70, Freight_Cost_Index)
util_base <- 0.76 + 0.06 * sin(2 * pi * (month_num - 2) / 12)
util_noise <- rnorm(n, mean = 0, sd = 0.035)
util_noise[covid_window] <- rnorm(sum(covid_window), mean = 0, sd = 0.09)
Factory_Utilisation <- util_base + util_noise
Factory_Utilisation <- pmax(0.52, pmin(0.98, Factory_Utilisation))
country_mult <- country_effect[Country]
country_sensitivity <- country_shock_sensitivity[Country]
product_mult <- product_effect[Product_Segment]
customer_mult <- customer_effect[Customer_Segment]
global_multiplier <- ifelse(Customer_Segment == "Global", 1.30, ifelse(Customer_Segment == "Regional", 1.12, 1.00))
freight_pressure <- (Freight_Cost_Index - 100) / 100
event_price_impulse <- 1 +
ifelse(covid_window, 0.04 * country_sensitivity, 0) +
ifelse(iran_prewindow, 0.03 * country_sensitivity, 0) +
ifelse(iran_window, 0.12 * global_multiplier * country_sensitivity, 0)
util_pressure <- 1 + 0.90 * (Factory_Utilisation - 0.76)
noise <- rnorm(n, mean = 0, sd = 4.8)
Net_Price <- base_price * inflation_factor * seasonality * country_mult * product_mult * customer_mult *
(1 + 0.14 * freight_pressure * global_multiplier * country_sensitivity) * util_pressure * covid_shock * event_price_impulse + noise
pricing_data_v2 <- data.frame(
Date = Date,
Country = Country,
Product_Segment = Product_Segment,
Customer_Segment = Customer_Segment,
Factory_Utilisation = round(Factory_Utilisation, 3),
Freight_Cost_Index = round(Freight_Cost_Index, 2),
Net_Price = round(pmax(95, Net_Price), 2),
stringsAsFactors = FALSE
)
write.csv(pricing_data_v2, "pricing_data_v2.csv", row.names = FALSE)
if (requireNamespace("writexl", quietly = TRUE)) {
writexl::write_xlsx(pricing_data_v2, "pricing_data_v2.xlsx")
} else if (requireNamespace("openxlsx", quietly = TRUE)) {
openxlsx::write.xlsx(pricing_data_v2, "pricing_data_v2.xlsx", overwrite = TRUE)
}
save(pricing_data_v2, file = "pricing_data_v2.RData")
head(pricing_data_v2)