library(dplyr)
library(tidyr)
library(tidyverse)
library(janitor)
library(lubridate)
library(psych)
# ------------------------------------------------------------
# Q1 – Coffee shop visits in past month
# ------------------------------------------------------------
coffee_clean <- read.csv("coffee_clean.csv")
q1_visits_freq <- coffee_clean %>%
count(q1_visits_month, sort = TRUE)
q1_visits_summary <- coffee_clean %>%
summarize(
mean_visits = mean(q1_visits_month_num, na.rm = TRUE),
sd_visits = sd(q1_visits_month_num, na.rm = TRUE),
min_visits = min(q1_visits_month_num, na.rm = TRUE),
max_visits = max(q1_visits_month_num, na.rm = TRUE)
)
print(q1_visits_freq)
## q1_visits_month n
## 1 1 31
## 2 0 12
## 3 3 9
## 4 6 7
## 5 2 6
## 6 4 6
## 7 5 5
## 8 9 4
## 9 8 3
## 10 10 3
## 11 12 3
## 12 15 3
## 13 20 2
## 14 25 2
## 15 7 1
## 16 13 1
## 17 16 1
## 18 21 1
## 19 22 1
print(q1_visits_summary)
## mean_visits sd_visits min_visits max_visits
## 1 4.960396 5.912564 0 25
# ------------------------------------------------------------
# Q2 – Brand ranking (1 = highest rank, 6 = lowest)
# ------------------------------------------------------------
q2_rank_long <- coffee_clean %>%
select(starts_with("q2_")) %>%
pivot_longer(
everything(),
names_to = "brand",
values_to = "rank"
)
q2_rank_freq <- q2_rank_long %>%
count(brand, rank)
q2_rank_mean <- q2_rank_long %>%
group_by(brand) %>%
summarize(
mean_rank = mean(as.numeric(rank), na.rm = TRUE),
n = sum(!is.na(rank)),
.groups = "drop"
) %>%
arrange(mean_rank)
print(q2_rank_freq)
## # A tibble: 36 × 3
## brand rank n
## <chr> <int> <int>
## 1 q2_rank_caribou 1 5
## 2 q2_rank_caribou 2 9
## 3 q2_rank_caribou 3 19
## 4 q2_rank_caribou 4 31
## 5 q2_rank_caribou 5 20
## 6 q2_rank_caribou 6 17
## 7 q2_rank_dunkin 1 9
## 8 q2_rank_dunkin 2 35
## 9 q2_rank_dunkin 3 17
## 10 q2_rank_dunkin 4 13
## # ℹ 26 more rows
print(q2_rank_mean)
## # A tibble: 6 × 3
## brand mean_rank n
## <chr> <dbl> <int>
## 1 q2_rank_starbucks 2.55 101
## 2 q2_rank_local 2.75 101
## 3 q2_rank_dunkin 3.31 101
## 4 q2_rank_dutchbros 3.79 101
## 5 q2_rank_caribou 4.02 101
## 6 q2_rank_peets 4.57 101
# ------------------------------------------------------------
# Q3 – Experience qualities importance (1 = most important, 5 = least)
# ------------------------------------------------------------
q3_rank_long <- coffee_clean %>%
select(starts_with("q3_")) %>%
pivot_longer(
everything(),
names_to = "quality",
values_to = "rank"
)
q3_rank_freq <- q3_rank_long %>%
count(quality, rank)
q3_rank_mean <- q3_rank_long %>%
group_by(quality) %>%
summarize(
mean_rank = mean(as.numeric(rank), na.rm = TRUE),
n = sum(!is.na(rank)),
.groups = "drop"
) %>%
arrange(mean_rank)
print(q3_rank_freq)
## # A tibble: 30 × 3
## quality rank n
## <chr> <int> <int>
## 1 q3_rank_comfy_seating 1 4
## 2 q3_rank_comfy_seating 2 6
## 3 q3_rank_comfy_seating 3 16
## 4 q3_rank_comfy_seating 4 19
## 5 q3_rank_comfy_seating 5 55
## 6 q3_rank_comfy_seating NA 1
## 7 q3_rank_fast_order 1 12
## 8 q3_rank_fast_order 2 22
## 9 q3_rank_fast_order 3 38
## 10 q3_rank_fast_order 4 19
## # ℹ 20 more rows
print(q3_rank_mean)
## # A tibble: 5 × 3
## quality mean_rank n
## <chr> <dbl> <int>
## 1 q3_rank_high_quality 1.71 100
## 2 q3_rank_fast_order 2.91 100
## 3 q3_rank_variety_drinks 2.92 100
## 4 q3_rank_friendly_staff 3.31 100
## 5 q3_rank_comfy_seating 4.15 100
# ------------------------------------------------------------
# Q4 – Starbucks activities ranking (1 = most often, 4 = least)
# ------------------------------------------------------------
q4_rank_long <- coffee_clean %>%
select(starts_with("q4_")) %>%
pivot_longer(
everything(),
names_to = "activity",
values_to = "rank"
)
q4_rank_freq <- q4_rank_long %>%
count(activity, rank)
q4_rank_mean <- q4_rank_long %>%
group_by(activity) %>%
summarize(
mean_rank = mean(as.numeric(rank), na.rm = TRUE),
n = sum(!is.na(rank)),
.groups = "drop"
) %>%
arrange(mean_rank)
print(q4_rank_freq)
## # A tibble: 20 × 3
## activity rank n
## <chr> <int> <int>
## 1 q4_rank_drive_through 1 42
## 2 q4_rank_drive_through 2 21
## 3 q4_rank_drive_through 3 14
## 4 q4_rank_drive_through 4 23
## 5 q4_rank_drive_through NA 1
## 6 q4_rank_grab_and_go 1 32
## 7 q4_rank_grab_and_go 2 51
## 8 q4_rank_grab_and_go 3 11
## 9 q4_rank_grab_and_go 4 6
## 10 q4_rank_grab_and_go NA 1
## 11 q4_rank_meet_others 1 12
## 12 q4_rank_meet_others 2 13
## 13 q4_rank_meet_others 3 33
## 14 q4_rank_meet_others 4 42
## 15 q4_rank_meet_others NA 1
## 16 q4_rank_work_study 1 14
## 17 q4_rank_work_study 2 15
## 18 q4_rank_work_study 3 42
## 19 q4_rank_work_study 4 29
## 20 q4_rank_work_study NA 1
print(q4_rank_mean)
## # A tibble: 4 × 3
## activity mean_rank n
## <chr> <dbl> <int>
## 1 q4_rank_grab_and_go 1.91 100
## 2 q4_rank_drive_through 2.18 100
## 3 q4_rank_work_study 2.86 100
## 4 q4_rank_meet_others 3.05 100
# ------------------------------------------------------------
# Q5 – Time in shop (ordered categorical)
# ------------------------------------------------------------
q5_time_freq <- coffee_clean %>%
count(q5_time_in_shop) %>%
mutate(prop = n / sum(n))
print(q5_time_freq)
## q5_time_in_shop n prop
## 1 10-30 minutes 26 0.25742574
## 2 31-60 minutes 16 0.15841584
## 3 Less than 10 minutes 58 0.57425743
## 4 More than 2 hours 1 0.00990099
# ------------------------------------------------------------
# Q6 – Personalization agreement (Likert 1–5)
# ------------------------------------------------------------
q6_agree_freq <- coffee_clean %>%
count(q6_personalization_agree) %>%
mutate(prop = n / sum(n))
q6_agree_summary <- coffee_clean %>%
summarize(
mean_agree = mean(q6_agree_num, na.rm = TRUE),
sd_agree = sd(q6_agree_num, na.rm = TRUE)
)
print(q6_agree_freq)
## q6_personalization_agree n prop
## 1 Neither agree nor disagree 32 0.31683168
## 2 Somewhat agree 19 0.18811881
## 3 Somewhat disagree 28 0.27722772
## 4 Strongly agree 5 0.04950495
## 5 Strongly disagree 16 0.15841584
## 6 <NA> 1 0.00990099
print(q6_agree_summary)
## mean_agree sd_agree
## 1 2.69 1.107322
# ------------------------------------------------------------
# Q7 – Semantic differentials (Impersonal–Personal etc.)
# ------------------------------------------------------------
q7_long <- coffee_clean %>%
select(starts_with("q7_")) %>%
pivot_longer(
everything(),
names_to = "dimension",
values_to = "score"
)
q7_freq <- q7_long %>%
count(dimension, score)
q7_summary <- q7_long %>%
group_by(dimension) %>%
summarize(
mean_score = mean(as.numeric(score), na.rm = TRUE),
sd_score = sd(as.numeric(score), na.rm = TRUE),
n = sum(!is.na(score)),
.groups = "drop"
)
print(q7_freq)
## # A tibble: 18 × 3
## dimension score n
## <chr> <int> <int>
## 1 q7_functional_experiential 1 11
## 2 q7_functional_experiential 2 25
## 3 q7_functional_experiential 3 34
## 4 q7_functional_experiential 4 15
## 5 q7_functional_experiential 5 15
## 6 q7_functional_experiential NA 1
## 7 q7_impersonal_personal 1 6
## 8 q7_impersonal_personal 2 20
## 9 q7_impersonal_personal 3 39
## 10 q7_impersonal_personal 4 23
## 11 q7_impersonal_personal 5 12
## 12 q7_impersonal_personal NA 1
## 13 q7_rigid_authentic 1 4
## 14 q7_rigid_authentic 2 17
## 15 q7_rigid_authentic 3 34
## 16 q7_rigid_authentic 4 29
## 17 q7_rigid_authentic 5 16
## 18 q7_rigid_authentic NA 1
print(q7_summary)
## # A tibble: 3 × 4
## dimension mean_score sd_score n
## <chr> <dbl> <dbl> <int>
## 1 q7_functional_experiential 2.98 1.21 100
## 2 q7_impersonal_personal 3.15 1.07 100
## 3 q7_rigid_authentic 3.36 1.07 100
# ------------------------------------------------------------
# Q8 – NPS (0–10) + NPS group
# ------------------------------------------------------------
q8_nps_group_freq <- coffee_clean %>%
count(q8_nps_group) %>%
mutate(prop = n / sum(n))
q8_nps_summary <- coffee_clean %>%
summarize(
mean_nps = mean(q8_nps_score_num, na.rm = TRUE),
sd_nps = sd(q8_nps_score_num, na.rm = TRUE),
min_nps = min(q8_nps_score_num, na.rm = TRUE),
max_nps = max(q8_nps_score_num, na.rm = TRUE)
)
print(q8_nps_group_freq)
## q8_nps_group n prop
## 1 Detractor 53 0.5247525
## 2 Passive 27 0.2673267
## 3 Promoter 21 0.2079208
print(q8_nps_summary)
## mean_nps sd_nps min_nps max_nps
## 1 5.940594 2.81006 0 10
# ------------------------------------------------------------
# Q9 – Standout initiative (categorical) + open-ended
# ------------------------------------------------------------
# Split comma-separated multi-select responses into one row per selection
q9_long <- coffee_clean %>%
mutate(resp_id = row_number()) %>% # use row as respondent id (replace if you have a real id)
filter(!is.na(q9_standout), q9_standout != "") %>%
separate_rows(q9_standout, sep = ",") %>%
mutate(q9_standout = str_trim(q9_standout))
# (A) Share of respondents selecting each initiative (recommended)
q9_standout_by_respondent <- q9_long %>%
distinct(resp_id, q9_standout) %>%
count(q9_standout, sort = TRUE) %>%
mutate(
prop_respondents = n / n_distinct(q9_long$resp_id)
)
print(q9_standout_by_respondent)
## # A tibble: 5 × 3
## q9_standout n prop_respondents
## <chr> <int> <dbl>
## 1 The app experience 34 0.337
## 2 Free refill policy 29 0.287
## 3 The new pickup experience with individualized messages… 29 0.287
## 4 Change in ambience 27 0.267
## 5 Other (please specify) 13 0.129
# (B) Share of total selections (optional)
q9_standout_by_selection <- q9_long %>%
count(q9_standout, sort = TRUE) %>%
mutate(prop_selections = n / sum(n))
print(q9_standout_by_selection)
## # A tibble: 5 × 3
## q9_standout n prop_selections
## <chr> <int> <dbl>
## 1 The app experience 34 0.258
## 2 Free refill policy 29 0.220
## 3 The new pickup experience with individualized messages … 29 0.220
## 4 Change in ambience 27 0.205
## 5 Other (please specify) 13 0.0985
# Open-ended "Other" text: show non-empty responses from respondents who selected Other
cat("\nNon-empty 'Other' text responses (if any):\n")
##
## Non-empty 'Other' text responses (if any):
q9_other_text_nonempty <- coffee_clean %>%
mutate(resp_id = row_number()) %>%
filter(str_detect(q9_standout, "Other")) %>%
select(resp_id, q9_other_text) %>%
filter(!is.na(q9_other_text), q9_other_text != "")
print(q9_other_text_nonempty)
## resp_id
## 1 23
## 2 26
## 3 29
## 4 31
## 5 35
## 6 40
## 7 42
## 8 51
## 9 79
## 10 87
## 11 94
## 12 96
## q9_other_text
## 1 None
## 2 Haven't really paid attention, I just get what I need and get out
## 3 No Raspberry
## 4 unfamiliar with any of these changes
## 5 none
## 6 None
## 7 none
## 8 Cup utilization by employees and company (The non-PC stuff, like bears or racism)
## 9 The cuts to the menu
## 10 none
## 11 Didn't notice.
## 12 that the ceo is a union buster
# ------------------------------------------------------------
# Q10 – Awareness of 'Hello Again'
# ------------------------------------------------------------
q10_hello_freq <- coffee_clean %>%
count(q10_hello_again) %>%
mutate(prop = n / sum(n))
print(q10_hello_freq)
## q10_hello_again n prop
## 1 No 72 0.7128713
## 2 Yes 29 0.2871287
# ------------------------------------------------------------
# Q11 – Gender
# ------------------------------------------------------------
q11_gender_freq <- coffee_clean %>%
count(q11_gender) %>%
mutate(prop = n / sum(n))
print(q11_gender_freq)
## q11_gender n prop
## 1 Female 56 0.55445545
## 2 Male 44 0.43564356
## 3 Prefer not to say 1 0.00990099
# ------------------------------------------------------------
# Q12 – Age
# ------------------------------------------------------------
q12_age_summary <- coffee_clean %>%
summarize(
mean_age = mean(q12_age_num, na.rm = TRUE),
sd_age = sd(q12_age_num, na.rm = TRUE),
min_age = min(q12_age_num, na.rm = TRUE),
max_age = max(q12_age_num, na.rm = TRUE)
)
print(q12_age_summary)
## mean_age sd_age min_age max_age
## 1 40.86 11.59765 21 74