This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
# Load libraries
library(readr) # for read_csv
library(dplyr) # for data manipulation
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr) # for string cleaning
library(tidyr) # for splitting multi-select answers
library(ggplot2) # for plotting
# ✅ Path to your cleaned CSV file
csv_path <- "C:/Users/Lenovo/Downloads/thesis/survey analysis/data/Survey_clean.csv"
# Load libraries
library(readr)
# Load CSV with UTF-8 encoding
survey <- read_csv(csv_path, locale = locale(encoding = "UTF-8"))
## Rows: 97 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): dining_frequency, age_group, dining_companions, avg_spend_per_meal...
## dbl (9): person, comfort_info_apps, comfort_communication_apps, comfort_ord...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Quick check
glimpse(survey)
## Rows: 97
## Columns: 21
## $ person <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, …
## $ dining_frequency <chr> "Several times per week", "Several times pe…
## $ age_group <chr> "25-34", "25-34", "25-34", "18-24", "18-24"…
## $ dining_companions <chr> "Alone;With one friend or partner;With seve…
## $ avg_spend_per_meal <chr> "Less than €10", "€10 - €20", "€10 - €20", …
## $ dining_types <chr> "Fast food restaurants;Casual dining restau…
## $ comfort_info_apps <dbl> 3, 5, 4, 4, 5, 5, 5, 4, 3, 5, 5, 5, 5, 5, 5…
## $ comfort_communication_apps <dbl> 3, 5, 4, 4, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5…
## $ comfort_ordering_apps <dbl> 3, 5, 5, 4, 4, 3, 3, 5, 3, 5, 5, 5, 5, 3, 4…
## $ comfort_payment_apps <dbl> 3, 4, 5, 3, 4, 4, 5, 5, 2, 2, 5, 5, 5, 4, 5…
## $ comfort_entertainment_apps <dbl> 4, 5, 5, 3, 5, 5, 5, 4, 4, 5, 5, 5, 5, 5, 5…
## $ used_digital_ordering <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "No", "Y…
## $ rating_digital_experience <dbl> 4, 5, 4, 4, 4, NA, 5, 4, 4, 5, 5, 3, 3, NA,…
## $ benefits_digital_ordering <chr> "Shorter waiting time;Easier payment proces…
## $ problems_digital_ordering <chr> "No", "I could ask the waiter in person abo…
## $ usefulness_unified_app <dbl> 4, 5, 5, 5, 5, 4, 5, 5, 4, 4, 3, 5, 5, 4, 5…
## $ importance_unified_app <dbl> 2, 3, 3, 4, 4, 4, 5, 3, 2, 4, 3, 5, 4, 4, 4…
## $ more_likely_dine_out <chr> "No", "Maybe", "Yes", "Yes", "Maybe", "Mayb…
## $ concerns_unified_app <chr> "Data privacy;Technical problems;Less inter…
## $ willingness_unified_app <chr> "Maybe", "Yes", "Yes", "Maybe", "Yes", "May…
## $ additional_comments <chr> "It's data needs to be extremely well-encry…
head(survey, 5)
## # A tibble: 5 × 21
## person dining_frequency age_group dining_companions avg_spend_per_meal
## <dbl> <chr> <chr> <chr> <chr>
## 1 1 Several times per week 25-34 Alone;With one fr… Less than €10
## 2 2 Several times per week 25-34 Alone;With one fr… €10 - €20
## 3 3 Several times per month 25-34 With one friend o… €10 - €20
## 4 4 Several times per month 18-24 With one friend o… €21 - €30
## 5 5 Once per week 18-24 With one friend o… €10 - €20
## # ℹ 16 more variables: dining_types <chr>, comfort_info_apps <dbl>,
## # comfort_communication_apps <dbl>, comfort_ordering_apps <dbl>,
## # comfort_payment_apps <dbl>, comfort_entertainment_apps <dbl>,
## # used_digital_ordering <chr>, rating_digital_experience <dbl>,
## # benefits_digital_ordering <chr>, problems_digital_ordering <chr>,
## # usefulness_unified_app <dbl>, importance_unified_app <dbl>,
## # more_likely_dine_out <chr>, concerns_unified_app <chr>, …
# List all comfort-related columns
comfort_cols <- c(
"comfort_info_apps",
"comfort_communication_apps",
"comfort_ordering_apps",
"comfort_payment_apps",
"comfort_entertainment_apps"
)
# Calculate mean comfort score for each respondent
survey <- survey %>%
mutate(digital_comfort_score = rowMeans(select(., all_of(comfort_cols)), na.rm = TRUE))
# Quick check
summary(survey$digital_comfort_score)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.200 4.000 4.600 4.351 5.000 5.000
survey <- survey %>%
mutate(
adopter_segment = ifelse(digital_comfort_score >= 4, "Early Adopter", "Late Adopter")
)
# Convert to factor for nice ordering
survey$adopter_segment <- factor(
survey$adopter_segment,
levels = c("Early Adopter", "Late Adopter")
)
# Check counts
table(survey$adopter_segment)
##
## Early Adopter Late Adopter
## 76 21
library(tidyr)
# Split multi-select answers into separate rows
benefits_long <- survey %>%
select(adopter_segment, benefits_digital_ordering) %>%
separate_rows(benefits_digital_ordering, sep = ";") %>%
mutate(benefits_digital_ordering = trimws(benefits_digital_ordering))
# Quick check
head(benefits_long, 10)
## # A tibble: 10 × 2
## adopter_segment benefits_digital_ordering
## <fct> <chr>
## 1 Late Adopter Shorter waiting time
## 2 Late Adopter Easier payment process
## 3 Late Adopter Less interaction needed with staff
## 4 Late Adopter Better overview of the menu
## 5 Late Adopter Order accuracy
## 6 Early Adopter Shorter waiting time
## 7 Early Adopter Easier payment process
## 8 Early Adopter Better overview of the menu
## 9 Early Adopter Shorter waiting time
## 10 Early Adopter Easier payment process
# Calculate % of each benefit by segment
benefits_summary <- benefits_long %>%
group_by(adopter_segment, benefits_digital_ordering) %>%
summarise(n = n(), .groups = "drop_last") %>%
mutate(percent = n / sum(n) * 100)
benefits_summary
## # A tibble: 15 × 4
## # Groups: adopter_segment [2]
## adopter_segment benefits_digital_ordering n percent
## <fct> <chr> <int> <dbl>
## 1 Early Adopter Better overview of the menu 32 19.5
## 2 Early Adopter Didnt order through an app 1 0.610
## 3 Early Adopter Easier payment process 38 23.2
## 4 Early Adopter Easily finding dietary and allergen information 6 3.66
## 5 Early Adopter Less interaction needed with staff 26 15.9
## 6 Early Adopter N/a 1 0.610
## 7 Early Adopter No benefits 1 0.610
## 8 Early Adopter Order accuracy 13 7.93
## 9 Early Adopter Shorter waiting time 46 28.0
## 10 Late Adopter Better overview of the menu 11 26.8
## 11 Late Adopter Easier payment process 4 9.76
## 12 Late Adopter Easily finding dietary and allergen information 1 2.44
## 13 Late Adopter Less interaction needed with staff 8 19.5
## 14 Late Adopter Order accuracy 3 7.32
## 15 Late Adopter Shorter waiting time 14 34.1
library(dplyr)
library(tidyr)
library(ggplot2)
library(stringr)
# ✅ Define valid predefined benefits from Q13
valid_benefits <- c(
"Shorter waiting time",
"Easier payment process",
"Less interaction needed with staff",
"Better overview of the menu",
"Easily finding dietary and allergen information",
"Order accuracy",
"No benefits" # keep this as a valid option
)
# ✅ Expand multi-select benefits
benefits_long <- survey %>%
separate_rows(benefits_digital_ordering, sep = ";") %>%
mutate(benefits_digital_ordering = str_trim(benefits_digital_ordering))
# ✅ Keep ONLY valid predefined benefits
benefits_clean <- benefits_long %>%
filter(benefits_digital_ordering %in% valid_benefits)
# ✅ Sort benefits by frequency
benefits_order <- benefits_clean %>%
count(benefits_digital_ordering, name = "total_count") %>%
arrange(desc(total_count)) %>%
pull(benefits_digital_ordering)
# ✅ Calculate percentages by adopter segment
benefits_summary <- benefits_clean %>%
group_by(adopter_segment, benefits_digital_ordering) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(adopter_segment) %>%
mutate(percentage = (count / sum(count)) * 100)
# ✅ Ensure ordered factors
benefits_summary <- benefits_summary %>%
mutate(benefits_digital_ordering = factor(benefits_digital_ordering, levels = benefits_order))
# ✅ Minimalistic colors
minimal_colors <- c("#FDBE85", "#74A9CF")
# ✅ Plot only clean predefined benefits
ggplot(benefits_summary,
aes(x = percentage,
y = benefits_digital_ordering,
fill = adopter_segment)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = minimal_colors) +
labs(
title = "Benefits of Digital Ordering",
x = "Percentage of respondents",
y = "Benefits",
fill = "Segment"
) +
theme_minimal(base_size = 13) +
theme(
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11),
legend.text = element_text(size = 11),
legend.position = "top",
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank()
)
# ✅ Define valid predefined concerns from Q18
valid_concerns <- c(
"Data privacy",
"Technical problems",
"Less interaction with staff",
"Too complicated to use",
"No concerns"
)
# ✅ Expand multi-select concerns
concerns_long <- survey %>%
separate_rows(concerns_unified_app, sep = ";") %>%
mutate(concerns_unified_app = str_trim(concerns_unified_app))
# ✅ Keep ONLY valid predefined concerns
concerns_clean <- concerns_long %>%
filter(concerns_unified_app %in% valid_concerns)
# ✅ Sort concerns by frequency
concerns_order <- concerns_clean %>%
count(concerns_unified_app, name = "total_count") %>%
arrange(desc(total_count)) %>%
pull(concerns_unified_app)
# ✅ Calculate percentages by adopter segment
concerns_summary <- concerns_clean %>%
group_by(adopter_segment, concerns_unified_app) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(adopter_segment) %>%
mutate(percentage = (count / sum(count)) * 100)
# ✅ Ensure ordered factors
concerns_summary <- concerns_summary %>%
mutate(concerns_unified_app = factor(concerns_unified_app, levels = concerns_order))
# ✅ Minimalistic colors
minimal_colors <- c("#FDBE85", "#74A9CF")
# ✅ Plot only clean predefined concerns
ggplot(concerns_summary,
aes(x = percentage,
y = concerns_unified_app,
fill = adopter_segment)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = minimal_colors) +
labs(
title = "Concerns about Using a Universal App",
x = "Percentage of respondents",
y = "Concerns",
fill = "Segment"
) +
theme_minimal(base_size = 13) +
theme(
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11),
legend.text = element_text(size = 11),
legend.position = "top",
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank()
)
plot_likert_by_adopter_percent <- function(data, column, question_title) {
data %>%
group_by(adopter_segment, !!sym(column)) %>% # group by adopter + response
summarise(count = n(), .groups = "drop") %>%
group_by(adopter_segment) %>% # get % within each adopter group
mutate(percent = count / sum(count) * 100) %>%
ggplot(aes(x = !!sym(column), y = percent, fill = adopter_segment)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = c("#FDBE85", "#74A9CF")) +
labs(
title = question_title,
x = "Response",
y = "Percentage of Respondents",
fill = "Segment"
) +
theme_minimal(base_size = 13) +
theme(
axis.text.x = element_text(angle = 0, size = 11),
legend.position = "top"
)
}
plot_likert_by_adopter_percent(
survey,
"usefulness_unified_app",
"Q15: Usefulness of a Unified App"
)
plot_likert_by_adopter_percent(
survey,
"importance_unified_app",
"Q16: Importance of a Unified App"
)
plot_likert_by_adopter_percent(
survey,
"more_likely_dine_out",
"Q17: Would a Unified App Make You Dine Out More?"
)
plot_likert_by_adopter_percent(
survey,
"willingness_unified_app",
"Q19: Willingness to Pay €1/month for a Unified App"
)