library(dplyr)
library(tidyr)
library(tidyverse)
library(janitor)
library(lubridate)
library(psych)


# ------------------------------------------------------------
# Q1 – Coffee shop visits in past month
# ------------------------------------------------------------
coffee_clean <- read.csv("coffee_clean.csv")

q1_visits_freq <- coffee_clean %>%
  count(q1_visits_month, sort = TRUE)

q1_visits_summary <- coffee_clean %>%
  summarize(
    mean_visits = mean(q1_visits_month_num, na.rm = TRUE),
    sd_visits   = sd(q1_visits_month_num, na.rm = TRUE),
    min_visits  = min(q1_visits_month_num, na.rm = TRUE),
    max_visits  = max(q1_visits_month_num, na.rm = TRUE)
  )

print(q1_visits_freq)
##    q1_visits_month  n
## 1                1 31
## 2                0 12
## 3                3  9
## 4                6  7
## 5                2  6
## 6                4  6
## 7                5  5
## 8                9  4
## 9                8  3
## 10              10  3
## 11              12  3
## 12              15  3
## 13              20  2
## 14              25  2
## 15               7  1
## 16              13  1
## 17              16  1
## 18              21  1
## 19              22  1
print(q1_visits_summary)
##   mean_visits sd_visits min_visits max_visits
## 1    4.960396  5.912564          0         25
# ------------------------------------------------------------
# Q2 – Brand ranking (1 = highest rank, 6 = lowest)
# ------------------------------------------------------------

q2_rank_long <- coffee_clean %>%
  select(starts_with("q2_")) %>%
  pivot_longer(
    everything(),
    names_to = "brand",
    values_to = "rank"
  )

q2_rank_freq <- q2_rank_long %>%
  count(brand, rank)

q2_rank_mean <- q2_rank_long %>%
  group_by(brand) %>%
  summarize(
    mean_rank = mean(as.numeric(rank), na.rm = TRUE),
    n         = sum(!is.na(rank)),
    .groups   = "drop"
  ) %>%
  arrange(mean_rank)

print(q2_rank_freq)
## # A tibble: 36 × 3
##    brand            rank     n
##    <chr>           <int> <int>
##  1 q2_rank_caribou     1     5
##  2 q2_rank_caribou     2     9
##  3 q2_rank_caribou     3    19
##  4 q2_rank_caribou     4    31
##  5 q2_rank_caribou     5    20
##  6 q2_rank_caribou     6    17
##  7 q2_rank_dunkin      1     9
##  8 q2_rank_dunkin      2    35
##  9 q2_rank_dunkin      3    17
## 10 q2_rank_dunkin      4    13
## # ℹ 26 more rows
print(q2_rank_mean)
## # A tibble: 6 × 3
##   brand             mean_rank     n
##   <chr>                 <dbl> <int>
## 1 q2_rank_starbucks      2.55   101
## 2 q2_rank_local          2.75   101
## 3 q2_rank_dunkin         3.31   101
## 4 q2_rank_dutchbros      3.79   101
## 5 q2_rank_caribou        4.02   101
## 6 q2_rank_peets          4.57   101
# ------------------------------------------------------------
# Q3 – Experience qualities importance (1 = most important, 5 = least)
# ------------------------------------------------------------

q3_rank_long <- coffee_clean %>%
  select(starts_with("q3_")) %>%
  pivot_longer(
    everything(),
    names_to = "quality",
    values_to = "rank"
  )

q3_rank_freq <- q3_rank_long %>%
  count(quality, rank)

q3_rank_mean <- q3_rank_long %>%
  group_by(quality) %>%
  summarize(
    mean_rank = mean(as.numeric(rank), na.rm = TRUE),
    n         = sum(!is.na(rank)),
    .groups   = "drop"
  ) %>%
  arrange(mean_rank)

print(q3_rank_freq)
## # A tibble: 30 × 3
##    quality                rank     n
##    <chr>                 <int> <int>
##  1 q3_rank_comfy_seating     1     4
##  2 q3_rank_comfy_seating     2     6
##  3 q3_rank_comfy_seating     3    16
##  4 q3_rank_comfy_seating     4    19
##  5 q3_rank_comfy_seating     5    55
##  6 q3_rank_comfy_seating    NA     1
##  7 q3_rank_fast_order        1    12
##  8 q3_rank_fast_order        2    22
##  9 q3_rank_fast_order        3    38
## 10 q3_rank_fast_order        4    19
## # ℹ 20 more rows
print(q3_rank_mean)
## # A tibble: 5 × 3
##   quality                mean_rank     n
##   <chr>                      <dbl> <int>
## 1 q3_rank_high_quality        1.71   100
## 2 q3_rank_fast_order          2.91   100
## 3 q3_rank_variety_drinks      2.92   100
## 4 q3_rank_friendly_staff      3.31   100
## 5 q3_rank_comfy_seating       4.15   100
# ------------------------------------------------------------
# Q4 – Starbucks activities ranking (1 = most often, 4 = least)
# ------------------------------------------------------------

q4_rank_long <- coffee_clean %>%
  select(starts_with("q4_")) %>%
  pivot_longer(
    everything(),
    names_to = "activity",
    values_to = "rank"
  )

q4_rank_freq <- q4_rank_long %>%
  count(activity, rank)

q4_rank_mean <- q4_rank_long %>%
  group_by(activity) %>%
  summarize(
    mean_rank = mean(as.numeric(rank), na.rm = TRUE),
    n         = sum(!is.na(rank)),
    .groups   = "drop"
  ) %>%
  arrange(mean_rank)

print(q4_rank_freq)
## # A tibble: 20 × 3
##    activity               rank     n
##    <chr>                 <int> <int>
##  1 q4_rank_drive_through     1    42
##  2 q4_rank_drive_through     2    21
##  3 q4_rank_drive_through     3    14
##  4 q4_rank_drive_through     4    23
##  5 q4_rank_drive_through    NA     1
##  6 q4_rank_grab_and_go       1    32
##  7 q4_rank_grab_and_go       2    51
##  8 q4_rank_grab_and_go       3    11
##  9 q4_rank_grab_and_go       4     6
## 10 q4_rank_grab_and_go      NA     1
## 11 q4_rank_meet_others       1    12
## 12 q4_rank_meet_others       2    13
## 13 q4_rank_meet_others       3    33
## 14 q4_rank_meet_others       4    42
## 15 q4_rank_meet_others      NA     1
## 16 q4_rank_work_study        1    14
## 17 q4_rank_work_study        2    15
## 18 q4_rank_work_study        3    42
## 19 q4_rank_work_study        4    29
## 20 q4_rank_work_study       NA     1
print(q4_rank_mean)
## # A tibble: 4 × 3
##   activity              mean_rank     n
##   <chr>                     <dbl> <int>
## 1 q4_rank_grab_and_go        1.91   100
## 2 q4_rank_drive_through      2.18   100
## 3 q4_rank_work_study         2.86   100
## 4 q4_rank_meet_others        3.05   100
# ------------------------------------------------------------
# Q5 – Time in shop (ordered categorical)
# ------------------------------------------------------------

q5_time_freq <- coffee_clean %>%
  count(q5_time_in_shop) %>%
  mutate(prop = n / sum(n))

print(q5_time_freq)
##        q5_time_in_shop  n       prop
## 1        10-30 minutes 26 0.25742574
## 2        31-60 minutes 16 0.15841584
## 3 Less than 10 minutes 58 0.57425743
## 4    More than 2 hours  1 0.00990099
# ------------------------------------------------------------
# Q6 – Personalization agreement (Likert 1–5)
# ------------------------------------------------------------

q6_agree_freq <- coffee_clean %>%
  count(q6_personalization_agree) %>%
  mutate(prop = n / sum(n))

q6_agree_summary <- coffee_clean %>%
  summarize(
    mean_agree = mean(q6_agree_num, na.rm = TRUE),
    sd_agree   = sd(q6_agree_num, na.rm = TRUE)
  )

print(q6_agree_freq)
##     q6_personalization_agree  n       prop
## 1 Neither agree nor disagree 32 0.31683168
## 2             Somewhat agree 19 0.18811881
## 3          Somewhat disagree 28 0.27722772
## 4             Strongly agree  5 0.04950495
## 5          Strongly disagree 16 0.15841584
## 6                       <NA>  1 0.00990099
print(q6_agree_summary)
##   mean_agree sd_agree
## 1       2.69 1.107322
# ------------------------------------------------------------
# Q7 – Semantic differentials (Impersonal–Personal etc.)
# ------------------------------------------------------------

q7_long <- coffee_clean %>%
  select(starts_with("q7_")) %>%
  pivot_longer(
    everything(),
    names_to = "dimension",
    values_to = "score"
  )

q7_freq <- q7_long %>%
  count(dimension, score)

q7_summary <- q7_long %>%
  group_by(dimension) %>%
  summarize(
    mean_score = mean(as.numeric(score), na.rm = TRUE),
    sd_score   = sd(as.numeric(score), na.rm = TRUE),
    n          = sum(!is.na(score)),
    .groups    = "drop"
  )

print(q7_freq)
## # A tibble: 18 × 3
##    dimension                  score     n
##    <chr>                      <int> <int>
##  1 q7_functional_experiential     1    11
##  2 q7_functional_experiential     2    25
##  3 q7_functional_experiential     3    34
##  4 q7_functional_experiential     4    15
##  5 q7_functional_experiential     5    15
##  6 q7_functional_experiential    NA     1
##  7 q7_impersonal_personal         1     6
##  8 q7_impersonal_personal         2    20
##  9 q7_impersonal_personal         3    39
## 10 q7_impersonal_personal         4    23
## 11 q7_impersonal_personal         5    12
## 12 q7_impersonal_personal        NA     1
## 13 q7_rigid_authentic             1     4
## 14 q7_rigid_authentic             2    17
## 15 q7_rigid_authentic             3    34
## 16 q7_rigid_authentic             4    29
## 17 q7_rigid_authentic             5    16
## 18 q7_rigid_authentic            NA     1
print(q7_summary)
## # A tibble: 3 × 4
##   dimension                  mean_score sd_score     n
##   <chr>                           <dbl>    <dbl> <int>
## 1 q7_functional_experiential       2.98     1.21   100
## 2 q7_impersonal_personal           3.15     1.07   100
## 3 q7_rigid_authentic               3.36     1.07   100
# ------------------------------------------------------------
# Q8 – NPS (0–10) + NPS group
# ------------------------------------------------------------

q8_nps_group_freq <- coffee_clean %>%
  count(q8_nps_group) %>%
  mutate(prop = n / sum(n))

q8_nps_summary <- coffee_clean %>%
  summarize(
    mean_nps = mean(q8_nps_score_num, na.rm = TRUE),
    sd_nps   = sd(q8_nps_score_num, na.rm = TRUE),
    min_nps  = min(q8_nps_score_num, na.rm = TRUE),
    max_nps  = max(q8_nps_score_num, na.rm = TRUE)
  )

print(q8_nps_group_freq)
##   q8_nps_group  n      prop
## 1    Detractor 53 0.5247525
## 2      Passive 27 0.2673267
## 3     Promoter 21 0.2079208
print(q8_nps_summary)
##   mean_nps  sd_nps min_nps max_nps
## 1 5.940594 2.81006       0      10
# ------------------------------------------------------------
# Q9 – Standout initiative (categorical) + open-ended
# ------------------------------------------------------------

# Split comma-separated multi-select responses into one row per selection
q9_long <- coffee_clean %>%
  mutate(resp_id = row_number()) %>%                 # use row as respondent id (replace if you have a real id)
  filter(!is.na(q9_standout), q9_standout != "") %>%
  separate_rows(q9_standout, sep = ",") %>%
  mutate(q9_standout = str_trim(q9_standout))

# (A) Share of respondents selecting each initiative (recommended)
q9_standout_by_respondent <- q9_long %>%
  distinct(resp_id, q9_standout) %>%
  count(q9_standout, sort = TRUE) %>%
  mutate(
    prop_respondents = n / n_distinct(q9_long$resp_id)
  )

print(q9_standout_by_respondent)
## # A tibble: 5 × 3
##   q9_standout                                                 n prop_respondents
##   <chr>                                                   <int>            <dbl>
## 1 The app experience                                         34            0.337
## 2 Free refill policy                                         29            0.287
## 3 The new pickup experience with individualized messages…    29            0.287
## 4 Change in ambience                                         27            0.267
## 5 Other (please specify)                                     13            0.129
# (B) Share of total selections (optional)
q9_standout_by_selection <- q9_long %>%
  count(q9_standout, sort = TRUE) %>%
  mutate(prop_selections = n / sum(n))

print(q9_standout_by_selection)
## # A tibble: 5 × 3
##   q9_standout                                                  n prop_selections
##   <chr>                                                    <int>           <dbl>
## 1 The app experience                                          34          0.258 
## 2 Free refill policy                                          29          0.220 
## 3 The new pickup experience with individualized messages …    29          0.220 
## 4 Change in ambience                                          27          0.205 
## 5 Other (please specify)                                      13          0.0985
# Open-ended "Other" text: show non-empty responses from respondents who selected Other
cat("\nNon-empty 'Other' text responses (if any):\n")
## 
## Non-empty 'Other' text responses (if any):
q9_other_text_nonempty <- coffee_clean %>%
  mutate(resp_id = row_number()) %>%
  filter(str_detect(q9_standout, "Other")) %>%
  select(resp_id, q9_other_text) %>%
  filter(!is.na(q9_other_text), q9_other_text != "")

print(q9_other_text_nonempty)
##    resp_id
## 1       23
## 2       26
## 3       29
## 4       31
## 5       35
## 6       40
## 7       42
## 8       51
## 9       79
## 10      87
## 11      94
## 12      96
##                                                                        q9_other_text
## 1                                                                               None
## 2                  Haven't really paid attention, I just get what I need and get out
## 3                                                                       No Raspberry
## 4                                               unfamiliar with any of these changes
## 5                                                                               none
## 6                                                                               None
## 7                                                                               none
## 8  Cup utilization by employees and company (The non-PC stuff, like bears or racism)
## 9                                                               The cuts to the menu
## 10                                                                              none
## 11                                                                    Didn't notice.
## 12                                                    that the ceo is a union buster
# ------------------------------------------------------------
# Q10 – Awareness of 'Hello Again'
# ------------------------------------------------------------

q10_hello_freq <- coffee_clean %>%
  count(q10_hello_again) %>%
  mutate(prop = n / sum(n))

print(q10_hello_freq)
##   q10_hello_again  n      prop
## 1              No 72 0.7128713
## 2             Yes 29 0.2871287
# ------------------------------------------------------------
# Q11 – Gender
# ------------------------------------------------------------

q11_gender_freq <- coffee_clean %>%
  count(q11_gender) %>%
  mutate(prop = n / sum(n))

print(q11_gender_freq)
##          q11_gender  n       prop
## 1            Female 56 0.55445545
## 2              Male 44 0.43564356
## 3 Prefer not to say  1 0.00990099
# ------------------------------------------------------------
# Q12 – Age
# ------------------------------------------------------------

q12_age_summary <- coffee_clean %>%
  summarize(
    mean_age = mean(q12_age_num, na.rm = TRUE),
    sd_age   = sd(q12_age_num, na.rm = TRUE),
    min_age  = min(q12_age_num, na.rm = TRUE),
    max_age  = max(q12_age_num, na.rm = TRUE)
  )

print(q12_age_summary)
##   mean_age   sd_age min_age max_age
## 1    40.86 11.59765      21      74