library(tidyverse)
set.seed(42)
cities <- c("Hà Nội", "TP.HCM", "Đà Nẵng", "Nha Trang", "Hội An")
aspects <- c("service", "room", "location", "cleanliness", "price")
sentiments <- c("Positive", "Neutral", "Negative")
languages <- c("Vietnamese", "English")
hotel_data <- tibble(
hotel_id = sample(1:50, 1000, replace = TRUE),
city = sample(cities, 1000, replace = TRUE,
prob = c(0.3, 0.3, 0.2, 0.1, 0.1)),
year = sample(2019:2023, 1000, replace = TRUE),
rating = round(runif(1000, 1, 5), 1),
sentiment = sample(sentiments, 1000, replace = TRUE,
prob = c(0.55, 0.25, 0.20)),
aspect = sample(aspects, 1000, replace = TRUE),
review_language = sample(languages, 1000, replace = TRUE,
prob = c(0.65, 0.35))
)