# Load libraries
library(readr)       # for reading CSVs
library(dplyr)       # for data manipulation
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)     # for plotting
library(ggpubr)      # for easier p-value reporting
# Define path to the CSV file
csv_path <- "C:/Users/Lenovo/Downloads/thesis/survey analysis/Data/Survey_clean.csv"

# Load the data
survey <- read_csv(csv_path, locale = locale(encoding = "UTF-8"))
## Rows: 97 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): dining_frequency, age_group, dining_companions, avg_spend_per_meal...
## dbl  (9): person, comfort_info_apps, comfort_communication_apps, comfort_ord...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Preview
head(survey, 3)
## # A tibble: 3 × 21
##   person dining_frequency        age_group dining_companions  avg_spend_per_meal
##    <dbl> <chr>                   <chr>     <chr>              <chr>             
## 1      1 Several times per week  25-34     Alone;With one fr… Less than €10     
## 2      2 Several times per week  25-34     Alone;With one fr… €10 - €20         
## 3      3 Several times per month 25-34     With one friend o… €10 - €20         
## # ℹ 16 more variables: dining_types <chr>, comfort_info_apps <dbl>,
## #   comfort_communication_apps <dbl>, comfort_ordering_apps <dbl>,
## #   comfort_payment_apps <dbl>, comfort_entertainment_apps <dbl>,
## #   used_digital_ordering <chr>, rating_digital_experience <dbl>,
## #   benefits_digital_ordering <chr>, problems_digital_ordering <chr>,
## #   usefulness_unified_app <dbl>, importance_unified_app <dbl>,
## #   more_likely_dine_out <chr>, concerns_unified_app <chr>, …
# Add up comfort scores from Q6 to Q10 (values from 1–5)
survey <- survey %>%
  mutate(digital_comfort_score = rowSums(across(c(
    comfort_info_apps,
    comfort_communication_apps,
    comfort_ordering_apps,
    comfort_payment_apps,
    comfort_entertainment_apps
  )), na.rm = TRUE))
survey <- survey %>%
  mutate(
    adopter_segment = ifelse(digital_comfort_score >= 20, "Early Adopter", "Late Adopter"),
    adopter_segment = factor(adopter_segment, levels = c("Early Adopter", "Late Adopter"))
  )

# Check distribution
table(survey$adopter_segment)
## 
## Early Adopter  Late Adopter 
##            76            21
# Q15: Usefulness of unified app
wilcox.test(usefulness_unified_app ~ adopter_segment, data = survey)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  usefulness_unified_app by adopter_segment
## W = 924.5, p-value = 0.2348
## alternative hypothesis: true location shift is not equal to 0
# Q16: Importance of ordering through unified app
wilcox.test(importance_unified_app ~ adopter_segment, data = survey)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  importance_unified_app by adopter_segment
## W = 1046, p-value = 0.02402
## alternative hypothesis: true location shift is not equal to 0
# Load libraries
library(ggplot2)
library(dplyr)

# Minimalist color palette
minimal_colors <- c("#FDAE6B", "#74A9CF")  # Orange for Early, Blue for Late

# Convert adopter_segment to factor (just in case)
survey$adopter_segment <- factor(survey$adopter_segment, levels = c("Early Adopter", "Late Adopter"))

# Box plot for Q15 – Usefulness
ggplot(survey, aes(x = adopter_segment, y = usefulness_unified_app, fill = adopter_segment)) +
  geom_boxplot(width = 0.6, outlier.shape = NA, alpha = 0.9) +
  geom_jitter(width = 0.15, alpha = 0.4, size = 1.8) +
  scale_fill_manual(values = minimal_colors) +
  labs(
    title = "Perceived Usefulness of a Unified App by Adopter Segment",
    x = "Adopter Segment",
    y = "Usefulness Rating (1–5)"
  ) +
  annotate("text", x = 1.5, y = 5.3, label = "Mann–Whitney U test\np = 0.235", size = 3.5, hjust = 0.5) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

# Box plot for Q16 – Importance
ggplot(survey, aes(x = adopter_segment, y = importance_unified_app, fill = adopter_segment)) +
  geom_boxplot(width = 0.6, outlier.shape = NA, alpha = 0.9) +
  geom_jitter(width = 0.15, alpha = 0.4, size = 1.8) +
  scale_fill_manual(values = minimal_colors) +
  labs(
    title = "Importance of Ordering via Unified App by Adopter Segment",
    x = "Adopter Segment",
    y = "Importance Rating (1–5)"
  ) +
  annotate("text", x = 1.5, y = 5.3, label = "Mann–Whitney U test\np = 0.024", size = 3.5, hjust = 0.5) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

# Q17: Would universal app increase likelihood of dining out?
table_17 <- table(survey$more_likely_dine_out, survey$adopter_segment)
chisq.test(table_17)
## Warning in chisq.test(table_17): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  table_17
## X-squared = 0.27174, df = 2, p-value = 0.873
# Q19: Willingness to pay €1/month
table_19 <- table(survey$willingness_unified_app, survey$adopter_segment)
chisq.test(table_19)
## Warning in chisq.test(table_19): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  table_19
## X-squared = 1.1984, df = 2, p-value = 0.5493