R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

# Load libraries
library(readr)      # for read_csv
library(dplyr)      # for data manipulation
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)    # for string cleaning
library(tidyr)      # for splitting multi-select answers
library(ggplot2)    # for plotting

# ✅ Path to your cleaned CSV file
csv_path <- "C:/Users/Lenovo/Downloads/thesis/survey analysis/data/Survey_clean.csv"
# Load libraries
library(readr)

# Load CSV with UTF-8 encoding
survey <- read_csv(csv_path, locale = locale(encoding = "UTF-8"))
## Rows: 97 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): dining_frequency, age_group, dining_companions, avg_spend_per_meal...
## dbl  (9): person, comfort_info_apps, comfort_communication_apps, comfort_ord...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Quick check
glimpse(survey)
## Rows: 97
## Columns: 21
## $ person                     <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, …
## $ dining_frequency           <chr> "Several times per week", "Several times pe…
## $ age_group                  <chr> "25-34", "25-34", "25-34", "18-24", "18-24"…
## $ dining_companions          <chr> "Alone;With one friend or partner;With seve…
## $ avg_spend_per_meal         <chr> "Less than €10", "€10 - €20", "€10 - €20", …
## $ dining_types               <chr> "Fast food restaurants;Casual dining restau…
## $ comfort_info_apps          <dbl> 3, 5, 4, 4, 5, 5, 5, 4, 3, 5, 5, 5, 5, 5, 5…
## $ comfort_communication_apps <dbl> 3, 5, 4, 4, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5…
## $ comfort_ordering_apps      <dbl> 3, 5, 5, 4, 4, 3, 3, 5, 3, 5, 5, 5, 5, 3, 4…
## $ comfort_payment_apps       <dbl> 3, 4, 5, 3, 4, 4, 5, 5, 2, 2, 5, 5, 5, 4, 5…
## $ comfort_entertainment_apps <dbl> 4, 5, 5, 3, 5, 5, 5, 4, 4, 5, 5, 5, 5, 5, 5…
## $ used_digital_ordering      <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "No", "Y…
## $ rating_digital_experience  <dbl> 4, 5, 4, 4, 4, NA, 5, 4, 4, 5, 5, 3, 3, NA,…
## $ benefits_digital_ordering  <chr> "Shorter waiting time;Easier payment proces…
## $ problems_digital_ordering  <chr> "No", "I could ask the waiter in person abo…
## $ usefulness_unified_app     <dbl> 4, 5, 5, 5, 5, 4, 5, 5, 4, 4, 3, 5, 5, 4, 5…
## $ importance_unified_app     <dbl> 2, 3, 3, 4, 4, 4, 5, 3, 2, 4, 3, 5, 4, 4, 4…
## $ more_likely_dine_out       <chr> "No", "Maybe", "Yes", "Yes", "Maybe", "Mayb…
## $ concerns_unified_app       <chr> "Data privacy;Technical problems;Less inter…
## $ willingness_unified_app    <chr> "Maybe", "Yes", "Yes", "Maybe", "Yes", "May…
## $ additional_comments        <chr> "It's data needs to be extremely well-encry…
head(survey, 5)
## # A tibble: 5 × 21
##   person dining_frequency        age_group dining_companions  avg_spend_per_meal
##    <dbl> <chr>                   <chr>     <chr>              <chr>             
## 1      1 Several times per week  25-34     Alone;With one fr… Less than €10     
## 2      2 Several times per week  25-34     Alone;With one fr… €10 - €20         
## 3      3 Several times per month 25-34     With one friend o… €10 - €20         
## 4      4 Several times per month 18-24     With one friend o… €21 - €30         
## 5      5 Once per week           18-24     With one friend o… €10 - €20         
## # ℹ 16 more variables: dining_types <chr>, comfort_info_apps <dbl>,
## #   comfort_communication_apps <dbl>, comfort_ordering_apps <dbl>,
## #   comfort_payment_apps <dbl>, comfort_entertainment_apps <dbl>,
## #   used_digital_ordering <chr>, rating_digital_experience <dbl>,
## #   benefits_digital_ordering <chr>, problems_digital_ordering <chr>,
## #   usefulness_unified_app <dbl>, importance_unified_app <dbl>,
## #   more_likely_dine_out <chr>, concerns_unified_app <chr>, …
# List all comfort-related columns
comfort_cols <- c(
  "comfort_info_apps",
  "comfort_communication_apps",
  "comfort_ordering_apps",
  "comfort_payment_apps",
  "comfort_entertainment_apps"
)

# Calculate mean comfort score for each respondent
survey <- survey %>%
  mutate(digital_comfort_score = rowMeans(select(., all_of(comfort_cols)), na.rm = TRUE))

# Quick check
summary(survey$digital_comfort_score)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.200   4.000   4.600   4.351   5.000   5.000
survey <- survey %>%
  mutate(
    adopter_segment = ifelse(digital_comfort_score >= 4, "Early Adopter", "Late Adopter")
  )

# Convert to factor for nice ordering
survey$adopter_segment <- factor(
  survey$adopter_segment,
  levels = c("Early Adopter", "Late Adopter")
)

# Check counts
table(survey$adopter_segment)
## 
## Early Adopter  Late Adopter 
##            76            21
library(tidyr)

# Split multi-select answers into separate rows
benefits_long <- survey %>%
  select(adopter_segment, benefits_digital_ordering) %>%
  separate_rows(benefits_digital_ordering, sep = ";") %>%
  mutate(benefits_digital_ordering = trimws(benefits_digital_ordering))

# Quick check
head(benefits_long, 10)
## # A tibble: 10 × 2
##    adopter_segment benefits_digital_ordering         
##    <fct>           <chr>                             
##  1 Late Adopter    Shorter waiting time              
##  2 Late Adopter    Easier payment process            
##  3 Late Adopter    Less interaction needed with staff
##  4 Late Adopter    Better overview of the menu       
##  5 Late Adopter    Order accuracy                    
##  6 Early Adopter   Shorter waiting time              
##  7 Early Adopter   Easier payment process            
##  8 Early Adopter   Better overview of the menu       
##  9 Early Adopter   Shorter waiting time              
## 10 Early Adopter   Easier payment process
# Calculate % of each benefit by segment
benefits_summary <- benefits_long %>%
  group_by(adopter_segment, benefits_digital_ordering) %>%
  summarise(n = n(), .groups = "drop_last") %>%
  mutate(percent = n / sum(n) * 100)

benefits_summary
## # A tibble: 15 × 4
## # Groups:   adopter_segment [2]
##    adopter_segment benefits_digital_ordering                           n percent
##    <fct>           <chr>                                           <int>   <dbl>
##  1 Early Adopter   Better overview of the menu                        32  19.5  
##  2 Early Adopter   Didnt order through an app                          1   0.610
##  3 Early Adopter   Easier payment process                             38  23.2  
##  4 Early Adopter   Easily finding dietary and allergen information     6   3.66 
##  5 Early Adopter   Less interaction needed with staff                 26  15.9  
##  6 Early Adopter   N/a                                                 1   0.610
##  7 Early Adopter   No benefits                                         1   0.610
##  8 Early Adopter   Order accuracy                                     13   7.93 
##  9 Early Adopter   Shorter waiting time                               46  28.0  
## 10 Late Adopter    Better overview of the menu                        11  26.8  
## 11 Late Adopter    Easier payment process                              4   9.76 
## 12 Late Adopter    Easily finding dietary and allergen information     1   2.44 
## 13 Late Adopter    Less interaction needed with staff                  8  19.5  
## 14 Late Adopter    Order accuracy                                      3   7.32 
## 15 Late Adopter    Shorter waiting time                               14  34.1
library(dplyr)
library(tidyr)
library(ggplot2)
library(stringr)

# ✅ Define valid predefined benefits from Q13
valid_benefits <- c(
  "Shorter waiting time",
  "Easier payment process",
  "Less interaction needed with staff",
  "Better overview of the menu",
  "Easily finding dietary and allergen information",
  "Order accuracy",
  "No benefits"  # keep this as a valid option
)

# ✅ Expand multi-select benefits
benefits_long <- survey %>%
  separate_rows(benefits_digital_ordering, sep = ";") %>%
  mutate(benefits_digital_ordering = str_trim(benefits_digital_ordering))

# ✅ Keep ONLY valid predefined benefits
benefits_clean <- benefits_long %>%
  filter(benefits_digital_ordering %in% valid_benefits)

# ✅ Sort benefits by frequency
benefits_order <- benefits_clean %>%
  count(benefits_digital_ordering, name = "total_count") %>%
  arrange(desc(total_count)) %>%
  pull(benefits_digital_ordering)

# ✅ Calculate percentages by adopter segment
benefits_summary <- benefits_clean %>%
  group_by(adopter_segment, benefits_digital_ordering) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(adopter_segment) %>%
  mutate(percentage = (count / sum(count)) * 100)

# ✅ Ensure ordered factors
benefits_summary <- benefits_summary %>%
  mutate(benefits_digital_ordering = factor(benefits_digital_ordering, levels = benefits_order))

# ✅ Minimalistic colors
minimal_colors <- c("#FDBE85", "#74A9CF")

# ✅ Plot only clean predefined benefits
ggplot(benefits_summary,
       aes(x = percentage,
           y = benefits_digital_ordering,
           fill = adopter_segment)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = minimal_colors) +
  labs(
    title = "Benefits of Digital Ordering",
    x = "Percentage of respondents",
    y = "Benefits",
    fill = "Segment"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11),
    legend.text = element_text(size = 11),
    legend.position = "top",
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank()
  )

# ✅ Define valid predefined concerns from Q18
valid_concerns <- c(
  "Data privacy",
  "Technical problems",
  "Less interaction with staff",
  "Too complicated to use",
  "No concerns"
)

# ✅ Expand multi-select concerns
concerns_long <- survey %>%
  separate_rows(concerns_unified_app, sep = ";") %>%
  mutate(concerns_unified_app = str_trim(concerns_unified_app))

# ✅ Keep ONLY valid predefined concerns
concerns_clean <- concerns_long %>%
  filter(concerns_unified_app %in% valid_concerns)

# ✅ Sort concerns by frequency
concerns_order <- concerns_clean %>%
  count(concerns_unified_app, name = "total_count") %>%
  arrange(desc(total_count)) %>%
  pull(concerns_unified_app)

# ✅ Calculate percentages by adopter segment
concerns_summary <- concerns_clean %>%
  group_by(adopter_segment, concerns_unified_app) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(adopter_segment) %>%
  mutate(percentage = (count / sum(count)) * 100)

# ✅ Ensure ordered factors
concerns_summary <- concerns_summary %>%
  mutate(concerns_unified_app = factor(concerns_unified_app, levels = concerns_order))

# ✅ Minimalistic colors
minimal_colors <- c("#FDBE85", "#74A9CF")

# ✅ Plot only clean predefined concerns
ggplot(concerns_summary,
       aes(x = percentage,
           y = concerns_unified_app,
           fill = adopter_segment)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = minimal_colors) +
  labs(
    title = "Concerns about Using a Universal App",
    x = "Percentage of respondents",
    y = "Concerns",
    fill = "Segment"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11),
    legend.text = element_text(size = 11),
    legend.position = "top",
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank()
  )

plot_likert_by_adopter_percent <- function(data, column, question_title) {
  data %>%
    group_by(adopter_segment, !!sym(column)) %>%      # group by adopter + response
    summarise(count = n(), .groups = "drop") %>%
    group_by(adopter_segment) %>%                     # get % within each adopter group
    mutate(percent = count / sum(count) * 100) %>%
    ggplot(aes(x = !!sym(column), y = percent, fill = adopter_segment)) +
    geom_bar(stat = "identity", position = "dodge") +
    scale_fill_manual(values = c("#FDBE85", "#74A9CF")) +
    labs(
      title = question_title,
      x = "Response",
      y = "Percentage of Respondents",
      fill = "Segment"
    ) +
    theme_minimal(base_size = 13) +
    theme(
      axis.text.x = element_text(angle = 0, size = 11),
      legend.position = "top"
    )
}
plot_likert_by_adopter_percent(
  survey,
  "usefulness_unified_app",
  "Q15: Usefulness of a Unified App"
)

plot_likert_by_adopter_percent(
  survey,
  "importance_unified_app",
  "Q16: Importance of a Unified App"
)

plot_likert_by_adopter_percent(
  survey,
  "more_likely_dine_out",
  "Q17: Would a Unified App Make You Dine Out More?"
)

plot_likert_by_adopter_percent(
  survey,
  "willingness_unified_app",
  "Q19: Willingness to Pay €1/month for a Unified App"
)