df <- df_raw |>
rename(
timestamp=c1, area_live=c3, area_work=c4, gender=c5,
education=c6, employment=c7, marital_status=c9, household_size=c10,
visit_frequency=c11, fav_soups=c12, dining_channel=c15, spend_raw=c16,
imp_taste=c17, imp_freshness=c18, imp_consistency=c19, imp_portions=c20,
imp_hygiene=c21, imp_ambience=c22, imp_location=c23, imp_parking=c24,
imp_speed=c25, imp_staff=c26, imp_delivery=c27, imp_online_ord=c28,
imp_pricing=c29, imp_variety=c30, imp_takeaway=c31, imp_authentic=c32,
dissatisfaction=c33, premium_willing=c34, pref_setting=c35,
aware_dalos=c46, overall_exp=c47, food_quality_exp=c48, relaunch_intent=c50
)
encode_likert <- function(x) {
x <- str_to_lower(str_trim(iconv(as.character(x), to="ASCII//TRANSLIT")))
dplyr::case_when(
str_detect(x,"not important") ~ 1L, str_detect(x,"slightly") ~ 2L,
str_detect(x,"moderately") ~ 3L, str_detect(x,"^important") ~ 4L,
str_detect(x,"extremely") ~ 5L, TRUE ~ NA_integer_
)
}
likert_cols <- c("imp_taste","imp_freshness","imp_consistency","imp_portions",
"imp_hygiene","imp_ambience","imp_location","imp_parking",
"imp_speed","imp_staff","imp_delivery","imp_online_ord",
"imp_pricing","imp_variety","imp_takeaway","imp_authentic")
df <- df |> mutate(across(all_of(likert_cols), encode_likert)) |>
mutate(
spend_num = dplyr::case_when(
str_detect(as.character(spend_raw),"(?i)below|elow") ~ 1L,
str_detect(as.character(spend_raw),"1.?500|1500") ~ 2L,
str_detect(as.character(spend_raw),"3.?001|3001") ~ 3L,
str_detect(as.character(spend_raw),"5.?001|5001") ~ 4L,
str_detect(as.character(spend_raw),"(?i)above|bove|8.?000") ~ 5L,
TRUE ~ NA_integer_
),
spend_label = factor(dplyr::case_when(
spend_num==1L~"Below N1,500", spend_num==2L~"N1,500-3,000",
spend_num==3L~"N3,001-5,000", spend_num==4L~"N5,001-8,000",
spend_num==5L~"Above N8,000", TRUE~NA_character_),
levels=c("Below N1,500","N1,500-3,000","N3,001-5,000","N5,001-8,000","Above N8,000")),
visit_num = dplyr::case_when(
str_detect(as.character(visit_frequency),"(?i)less") ~ 1L,
str_detect(as.character(visit_frequency),"(?i)month") ~ 2L,
str_detect(as.character(visit_frequency),"(?i)1.2.*week|1.*2.*week") ~ 3L,
str_detect(as.character(visit_frequency),"(?i)3.4|3.*4") ~ 4L,
str_detect(as.character(visit_frequency),"(?i)daily") ~ 5L,
TRUE ~ NA_integer_
),
freq_label = factor(dplyr::case_when(
visit_num==1L~"< Once/month", visit_num==2L~"1-2x/month",
visit_num==3L~"1-2x/week", visit_num==4L~"3-4x/week",
visit_num==5L~"Daily", TRUE~NA_character_),
levels=c("< Once/month","1-2x/month","1-2x/week","3-4x/week","Daily")),
edu_group = factor(dplyr::case_when(
str_detect(as.character(education),"(?i)secondary|waec|neco|ond") ~ "Secondary/OND",
str_detect(as.character(education),"(?i)hnd") ~ "HND",
str_detect(as.character(education),"(?i)bachelor|b\\.sc|b\\.a") ~ "Bachelor's",
str_detect(as.character(education),"(?i)postgrad|mba|m\\.sc|ph") ~ "Postgraduate",
str_detect(as.character(education),"(?i)professional|cert") ~ "Professional Cert",
TRUE~"Other"),
levels=c("Secondary/OND","HND","Bachelor's","Postgraduate","Professional Cert")),
emp_group = dplyr::case_when(
str_detect(as.character(employment),"(?i)private") ~ "Private sector",
str_detect(as.character(employment),"(?i)self|business") ~ "Self-employed",
str_detect(as.character(employment),"(?i)student") ~ "Student",
str_detect(as.character(employment),"(?i)unemploy") ~ "Unemployed",
str_detect(as.character(employment),"(?i)public|gov|church") ~ "Public/Other",
TRUE ~ "Other"
),
intent_binary = if_else(
str_detect(as.character(relaunch_intent),"(?i)very likely|extremely likely"),1L,0L),
premium_binary = if_else(
str_detect(as.character(premium_willing),"(?i)definitely yes|probably yes"),1L,0L),
spend_num = if_else(is.na(spend_num),as.integer(median(spend_num,na.rm=TRUE)),spend_num),
visit_num = if_else(is.na(visit_num),as.integer(median(visit_num,na.rm=TRUE)),visit_num)
)
cat("Clean dataset:", nrow(df), "rows\n")