R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(readxl)    #for reading Excel files
library(tidyverse) #for data manipulation
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)   #
library(dplyr)
library(janitor)   #for cleaning column names
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(skimr)     #for summarizing data
library(psych)     #for Descriptive Analysis
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

Setting Working Environment

setwd("D:\\School of Statistician")

Loading Dataset

Coffee_Survey <- read_excel("D:\\School of Statistician\\Coffee_Shop_Survey.xlsx")

View the first few rows

head(Coffee_Survey)
## # A tibble: 6 × 9
##   Customer_ID   Age Gender Visit_Frequency Favorite_Product Satisfaction_Score
##   <chr>       <dbl> <chr>            <dbl> <chr>                         <dbl>
## 1 CUST001        56 Male                 1 Sandwich                          2
## 2 CUST002        46 Male                 2 Sandwich                          1
## 3 CUST003        32 Male                 6 Pastry                            5
## 4 CUST004        60 Female               2 Pastry                            4
## 5 CUST005        25 Female               3 Tea                               2
## 6 CUST006        38 Female               7 Pastry                            1
## # ℹ 3 more variables: `Time_Spent (min)` <dbl>, Loyalty_Member <chr>,
## #   Would_Recommend <chr>

View the Last few Columns

tail(Coffee_Survey)
## # A tibble: 6 × 9
##   Customer_ID   Age Gender Visit_Frequency Favorite_Product Satisfaction_Score
##   <chr>       <dbl> <chr>            <dbl> <chr>                         <dbl>
## 1 CUST095        42 Male                 7 Pastry                            2
## 2 CUST096        24 Female               6 Coffee                            1
## 3 CUST097        26 Female               2 Tea                               5
## 4 CUST098        41 Female               0 Pastry                            5
## 5 CUST099        18 Male                 7 Tea                               4
## 6 CUST100        61 Male                 0 Pastry                            2
## # ℹ 3 more variables: `Time_Spent (min)` <dbl>, Loyalty_Member <chr>,
## #   Would_Recommend <chr>

Check Structure

str(Coffee_Survey)
## tibble [100 × 9] (S3: tbl_df/tbl/data.frame)
##  $ Customer_ID       : chr [1:100] "CUST001" "CUST002" "CUST003" "CUST004" ...
##  $ Age               : num [1:100] 56 46 32 60 25 38 56 36 40 28 ...
##  $ Gender            : chr [1:100] "Male" "Male" "Male" "Female" ...
##  $ Visit_Frequency   : num [1:100] 1 2 6 2 3 7 6 3 0 0 ...
##  $ Favorite_Product  : chr [1:100] "Sandwich" "Sandwich" "Pastry" "Pastry" ...
##  $ Satisfaction_Score: num [1:100] 2 1 5 4 2 1 4 5 4 1 ...
##  $ Time_Spent (min)  : num [1:100] 53 32 36 31 24 28 16 54 39 37 ...
##  $ Loyalty_Member    : chr [1:100] "Yes" "No" "Yes" "Yes" ...
##  $ Would_Recommend   : chr [1:100] "No" "Yes" "Yes" "Yes" ...

Clean Column name to be safe

Coffee_Survey <- Coffee_Survey %>% clean_names()

#Quick Summary

skim(Coffee_Survey)
Data summary
Name Coffee_Survey
Number of rows 100
Number of columns 9
_______________________
Column type frequency:
character 5
numeric 4
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
customer_id 0 1 7 7 0 100 0
gender 0 1 4 6 0 3 0
favorite_product 0 1 3 8 0 4 0
loyalty_member 0 1 2 3 0 2 0
would_recommend 0 1 2 3 0 2 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
age 0 1 40.88 13.99 18 30.50 41.0 53.25 64 ▇▆▇▃▇
visit_frequency 0 1 3.51 2.61 0 1.00 3.0 6.00 7 ▇▃▅▂▇
satisfaction_score 0 1 3.16 1.46 1 2.00 3.0 4.25 5 ▅▇▃▇▇
time_spent_min 0 1 33.36 14.63 5 21.75 33.5 45.00 60 ▆▅▇▇▆

Descriptive Analysis

Average Age of Customers Visiting the Coffee Shop

mean(Coffee_Survey$age, na.rm = TRUE)
## [1] 40.88
# na.rm =TRUE means: ignore missing (NA) values of any
# This will print the average(mean) age.

What is the Gender Distribution Among respondents

Coffee_Survey %>%
  count(gender) %>%
  mutate(percentage = n/ sum(n) * 100)
## # A tibble: 3 × 3
##   gender     n percentage
##   <chr>  <int>      <dbl>
## 1 Female    43         43
## 2 Male      49         49
## 3 Other      8          8
# Shows how many males, females, and others there are
# Percentage shows the percentage of each gender group

RELATIONSHIP & COMPARISON

Does visit frequency differ by gender or age group

# Visit Frequency by Gender

Coffee_Survey %>%   # Start with the 
  group_by(gender, visit_frequency) %>%
  summarise(count =n()) %>%    # count how many rows(Customers) are in that group. n is the number of rows
  arrange(gender, desc(count))
## `summarise()` has grouped output by 'gender'. You can override using the
## `.groups` argument.
## # A tibble: 21 × 3
## # Groups:   gender [3]
##    gender visit_frequency count
##    <chr>            <dbl> <int>
##  1 Female               0     8
##  2 Female               3     8
##  3 Female               2     7
##  4 Female               7     7
##  5 Female               1     4
##  6 Female               4     3
##  7 Female               5     3
##  8 Female               6     3
##  9 Male                 7    13
## 10 Male                 0     9
## # ℹ 11 more rows

Visit Frequency by Age Group

# Create Age Groups
Coffee_Survey <- Coffee_Survey %>%
  mutate(age_group = case_when(    # mutate= Create or modify columns
    age < 20 ~ "Under 20",
    age >= 20 & age < 30 ~ "20-29",
    age >= 30 & age < 40 ~ "30-39",
    age >= 40 & age < 50 ~ "40-49",
    age >= 50 ~ "50+",
    TRUE ~ "Unknown" # if none of the above apply, label as Unknown.
  ))

# Visit Frequency by Age Group
Coffee_Survey %>%
  group_by(age_group, visit_frequency) %>%
  summarise(count =n()) %>%
  arrange(age_group, desc(count))
## `summarise()` has grouped output by 'age_group'. You can override using the
## `.groups` argument.
## # A tibble: 34 × 3
## # Groups:   age_group [5]
##    age_group visit_frequency count
##    <chr>               <dbl> <int>
##  1 20-29                   0     4
##  2 20-29                   2     4
##  3 20-29                   5     4
##  4 20-29                   3     3
##  5 20-29                   7     2
##  6 20-29                   4     1
##  7 20-29                   6     1
##  8 30-39                   3     6
##  9 30-39                   7     6
## 10 30-39                   2     3
## # ℹ 24 more rows
# Shows how often different age groups visit

Is there a relationship between Visit frequency and satisfaction score

# Boxplot: Visit Frequency vs Satisfaction Score
library(ggplot2)

ggplot(Coffee_Survey, aes(x = visit_frequency, y = satisfaction_score)) + 
  geom_boxplot(fill = "green") +
  labs(title = "Visit Frequency Vs Satisfaction Score",
       x = "Visit Frequency",
       y = "Satisfaction Score") +
  theme_minimal()
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?

# Each box shows the range of satisfaction scores for each visit frequency category.
# You can visually see if frequent visitors are more or less satisfied.

relationship between visit frequency and satisfaction score # Spearman correlation

cor.test(Coffee_Survey$visit_frequency, Coffee_Survey$satisfaction_score, method = "spearman")
## Warning in cor.test.default(Coffee_Survey$visit_frequency,
## Coffee_Survey$satisfaction_score, : Cannot compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  Coffee_Survey$visit_frequency and Coffee_Survey$satisfaction_score
## S = 168487, p-value = 0.9133
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##         rho 
## -0.01102338

Using Scatter Plot

ggplot(Coffee_Survey, aes(x = visit_frequency, y = satisfaction_score)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", color = "blue") +
  ggtitle("Scatter Plot: Visit_Frequency vs Satisfaction_Score")
## `geom_smooth()` using formula = 'y ~ x'

  theme_minimal()
## List of 136
##  $ line                            :List of 6
##   ..$ colour       : chr "black"
##   ..$ linewidth    : num 0.5
##   ..$ linetype     : num 1
##   ..$ lineend      : chr "butt"
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ rect                            :List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : chr "black"
##   ..$ linewidth    : num 0.5
##   ..$ linetype     : num 1
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ text                            :List of 11
##   ..$ family       : chr ""
##   ..$ face         : chr "plain"
##   ..$ colour       : chr "black"
##   ..$ size         : num 11
##   ..$ hjust        : num 0.5
##   ..$ vjust        : num 0.5
##   ..$ angle        : num 0
##   ..$ lineheight   : num 0.9
##   ..$ margin       : 'margin' num [1:4] 0points 0points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ title                           : NULL
##  $ aspect.ratio                    : NULL
##  $ axis.title                      : NULL
##  $ axis.title.x                    :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 2.75points 0points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.x.top                :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 0
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 2.75points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.x.bottom             : NULL
##  $ axis.title.y                    :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : num 90
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 2.75points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.y.left               : NULL
##  $ axis.title.y.right              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : num -90
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 0points 2.75points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text                       :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : chr "grey30"
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.x                     :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 2.2points 0points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.x.top                 :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 0
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 2.2points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.x.bottom              : NULL
##  $ axis.text.y                     :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 1
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 2.2points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.y.left                : NULL
##  $ axis.text.y.right               :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 0points 2.2points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.theta                 : NULL
##  $ axis.text.r                     :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0.5
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 2.2points 0points 2.2points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.ticks                      : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ axis.ticks.x                    : NULL
##  $ axis.ticks.x.top                : NULL
##  $ axis.ticks.x.bottom             : NULL
##  $ axis.ticks.y                    : NULL
##  $ axis.ticks.y.left               : NULL
##  $ axis.ticks.y.right              : NULL
##  $ axis.ticks.theta                : NULL
##  $ axis.ticks.r                    : NULL
##  $ axis.minor.ticks.x.top          : NULL
##  $ axis.minor.ticks.x.bottom       : NULL
##  $ axis.minor.ticks.y.left         : NULL
##  $ axis.minor.ticks.y.right        : NULL
##  $ axis.minor.ticks.theta          : NULL
##  $ axis.minor.ticks.r              : NULL
##  $ axis.ticks.length               : 'simpleUnit' num 2.75points
##   ..- attr(*, "unit")= int 8
##  $ axis.ticks.length.x             : NULL
##  $ axis.ticks.length.x.top         : NULL
##  $ axis.ticks.length.x.bottom      : NULL
##  $ axis.ticks.length.y             : NULL
##  $ axis.ticks.length.y.left        : NULL
##  $ axis.ticks.length.y.right       : NULL
##  $ axis.ticks.length.theta         : NULL
##  $ axis.ticks.length.r             : NULL
##  $ axis.minor.ticks.length         : 'rel' num 0.75
##  $ axis.minor.ticks.length.x       : NULL
##  $ axis.minor.ticks.length.x.top   : NULL
##  $ axis.minor.ticks.length.x.bottom: NULL
##  $ axis.minor.ticks.length.y       : NULL
##  $ axis.minor.ticks.length.y.left  : NULL
##  $ axis.minor.ticks.length.y.right : NULL
##  $ axis.minor.ticks.length.theta   : NULL
##  $ axis.minor.ticks.length.r       : NULL
##  $ axis.line                       : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ axis.line.x                     : NULL
##  $ axis.line.x.top                 : NULL
##  $ axis.line.x.bottom              : NULL
##  $ axis.line.y                     : NULL
##  $ axis.line.y.left                : NULL
##  $ axis.line.y.right               : NULL
##  $ axis.line.theta                 : NULL
##  $ axis.line.r                     : NULL
##  $ legend.background               : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ legend.margin                   : 'margin' num [1:4] 5.5points 5.5points 5.5points 5.5points
##   ..- attr(*, "unit")= int 8
##  $ legend.spacing                  : 'simpleUnit' num 11points
##   ..- attr(*, "unit")= int 8
##  $ legend.spacing.x                : NULL
##  $ legend.spacing.y                : NULL
##  $ legend.key                      : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ legend.key.size                 : 'simpleUnit' num 1.2lines
##   ..- attr(*, "unit")= int 3
##  $ legend.key.height               : NULL
##  $ legend.key.width                : NULL
##  $ legend.key.spacing              : 'simpleUnit' num 5.5points
##   ..- attr(*, "unit")= int 8
##  $ legend.key.spacing.x            : NULL
##  $ legend.key.spacing.y            : NULL
##  $ legend.frame                    : NULL
##  $ legend.ticks                    : NULL
##  $ legend.ticks.length             : 'rel' num 0.2
##  $ legend.axis.line                : NULL
##  $ legend.text                     :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ legend.text.position            : NULL
##  $ legend.title                    :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ legend.title.position           : NULL
##  $ legend.position                 : chr "right"
##  $ legend.position.inside          : NULL
##  $ legend.direction                : NULL
##  $ legend.byrow                    : NULL
##  $ legend.justification            : chr "center"
##  $ legend.justification.top        : NULL
##  $ legend.justification.bottom     : NULL
##  $ legend.justification.left       : NULL
##  $ legend.justification.right      : NULL
##  $ legend.justification.inside     : NULL
##  $ legend.location                 : NULL
##  $ legend.box                      : NULL
##  $ legend.box.just                 : NULL
##  $ legend.box.margin               : 'margin' num [1:4] 0cm 0cm 0cm 0cm
##   ..- attr(*, "unit")= int 1
##  $ legend.box.background           : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ legend.box.spacing              : 'simpleUnit' num 11points
##   ..- attr(*, "unit")= int 8
##   [list output truncated]
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi TRUE
##  - attr(*, "validate")= logi TRUE

#Do loyalty members spend more time at the coffee shop than non-members

# Average time spent by loyalty status
Coffee_Survey %>%
  group_by(loyalty_member) %>%
  summarise(
    average_time_spent_min = mean(time_spent_min, na.rm = TRUE),
    median_time_spent_min = median(time_spent_min, na.rm = TRUE),
    count = n()
  )
## # A tibble: 2 × 4
##   loyalty_member average_time_spent_min median_time_spent_min count
##   <chr>                           <dbl>                 <dbl> <int>
## 1 No                               30.5                    29    45
## 2 Yes                              35.7                    37    55
# this gives the average and median time spent for loyalty members vs non

Visualize it with a boxplot

# BoxPlot: Time Spent Vs Loyalty Member

ggplot(Coffee_Survey, aes(x = loyalty_member, y = time_spent_min, fill = loyalty_member)) +
  geom_boxplot() +
  labs(title = "Time Spent at Coffee Shop by Loyalty Membership",
       x = "Loyalty Member",
       y = "Time Spent (minutes)") +
  theme_minimal()

# This shows the distribution of time spent for loyalty vs non-loyalty customers.

Are customers who would recommend the coffee shop more likely to be loyal members.

# We compare recommendations vs loyalty membership
# Create a simple cross-tab
# Cross-tab of loyalty membership and recommendation

table(Coffee_Survey$loyalty_member, Coffee_Survey$would_recommend)
##      
##       No Yes
##   No   7  38
##   Yes 20  35
# This shows how may loyalty members and non-members would or wouldn't recommend the coffee shop.

Enhancing Percentage Table

Coffee_Survey %>%
  group_by(loyalty_member, would_recommend) %>%
  summarise(count =n()) %>%
  group_by(loyalty_member) %>%
  mutate(percent = round(count / sum(count) * 100, 1))
## `summarise()` has grouped output by 'loyalty_member'. You can override using
## the `.groups` argument.
## # A tibble: 4 × 4
## # Groups:   loyalty_member [2]
##   loyalty_member would_recommend count percent
##   <chr>          <chr>           <int>   <dbl>
## 1 No             No                  7    15.6
## 2 No             Yes                38    84.4
## 3 Yes            No                 20    36.4
## 4 Yes            Yes                35    63.6
# this shows for each loyalty group (Yes or No) what percentage would recommend.

Are customers who would recommend the coffee shop more likely to be loyal members.

#customers who would recommend the coffee shop more likely to be loyal members?

Recommend_Loyalty <- Coffee_Survey %>%
  filter(!is.na(would_recommend), !is.na(loyalty_member)) %>%
  group_by(would_recommend, loyalty_member) %>%
  summarise(count = n()) %>%
  ungroup()
## `summarise()` has grouped output by 'would_recommend'. You can override using
## the `.groups` argument.
ggplot(Recommend_Loyalty, aes(x = loyalty_member, y = count, fill = would_recommend)) +
  geom_col(position = "dodge") +
  labs(
    title = "Recommendation vs Loyalty Membership",
    x = "Loyalty Member Status",
    y = "Number of Customers"
  ) +
  theme_minimal()

INSIGHTFUL / ADVANCED QUESTIONS

# What factors (age, loyalty status, product preference) are associated with higher satisfaction scores?

Coffee_Survey %>%
  select(age, satisfaction_score) %>%
  cor(use = "complete.obs")
##                            age satisfaction_score
## age                 1.00000000        -0.01436614
## satisfaction_score -0.01436614         1.00000000
# Visual: Box plots
ggplot(Coffee_Survey, aes(x = loyalty_member, y = satisfaction_score, fill = loyalty_member)) + 
  geom_boxplot() +
  theme_minimal() +
  labs(title = "Satisfaction Score by Loyalty Status")+
  
 scale_fill_brewer(palette = "Dark2")  # Try "Set2", "Pastel1", "Dark2", 

ggplot(Coffee_Survey, aes(x = favorite_product, y = satisfaction_score, fill = favorite_product)) +
  geom_boxplot() +
  theme_minimal() +
  labs(title = "Satisfaction Score by Product Preference") +
 scale_fill_brewer(palette = "Dark2") 

Is there a difference in satisfaction scores between customers who prefer coffee and those who prefer pastries?

# Filter only Coffee and Pastries
Filtered_Data <- Coffee_Survey %>% filter(favorite_product %in% c("Coffee", "Pastry")) %>%
  filter(!is.na(favorite_product)) # remove NAs

table(Filtered_Data$favorite_product)
## 
## Coffee Pastry 
##     24     25
# T-test
t.test(satisfaction_score ~ favorite_product, data = Filtered_Data)
## 
##  Welch Two Sample t-test
## 
## data:  satisfaction_score by favorite_product
## t = -1.183, df = 46.929, p-value = 0.2427
## alternative hypothesis: true difference in means between group Coffee and group Pastry is not equal to 0
## 95 percent confidence interval:
##  -1.3232661  0.3432661
## sample estimates:
## mean in group Coffee mean in group Pastry 
##                 2.75                 3.24
# Visual 
ggplot(Filtered_Data, aes(x = favorite_product, y = satisfaction_score, fill = favorite_product)) +
  geom_boxplot() +
  theme_minimal() +
  labs(title = "Satisfaction by Product Preference (Coffee vs Pastry)")+
  scale_fill_brewer(palette = "Dark2")

Can we predict whether a customer would recommend the coffee shop based on other variables?

Coffee_Survey$would_recommend <- as.factor(Coffee_Survey$would_recommend)

#Build a model
recommend_model <- glm(would_recommend ~ age + gender + visit_frequency +
                         favorite_product + `time_spent_min` + loyalty_member,
                       data = Coffee_Survey, family = "binomial")

# Add predicted varibles from the model dataset
Coffee_Survey$predicted_prob <-predict(recommend_model, type = "response")
# convert numerical to characters for consistency 
coffee_long <- Coffee_Survey %>%
  select(age, `time_spent_min`, visit_frequency, gender, favorite_product, loyalty_member, 
         predicted_prob) %>%
  mutate(age = cut(age, breaks = 5),
         `time_spent_min` = cut(`time_spent_min`, breaks = 5),
         visit_frequency = as.character(visit_frequency),
         gender = as.character(gender),
         favorite_product = as.character(favorite_product),
         loyalty_member = as.character(loyalty_member)) %>%
  pivot_longer(
    cols = -predicted_prob,
    names_to = "variable", 
    values_to = "value"
  )

ggplot(coffee_long, aes(x = interaction(variable, value), y = predicted_prob, fill = variable)) +
  geom_boxplot() +
  labs(title = "Predicted Probability of Recommendation",
       x = "Variable and Value",
       y = "Predicted Probability") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

TIME-SPECIFIC

ggplot(Coffee_Survey, aes(x = visit_frequency)) +
  geom_bar(fill = "cyan") +
  labs(
    title = "Visit Frequency Distribution",
    x = "Visit Frequency",
    y = "Number of Customers"
  ) +
theme_minimal()

#Is there any seasonality in satisfaction levels?

# Example if you had 'Visit_Date'
set.seed(123)  # For reproducibility
coffee <- Coffee_Survey %>%
  mutate(Month = factor(sample(month.abb, n(), replace = TRUE), levels = month.abb))

# Now plot Satisfaction Score across Months
ggplot(coffee, aes(x = Month, y = satisfaction_score)) +
  geom_boxplot(fill = "red") +
  labs(
    title = "Satisfaction Levels Across Months (Simulated)",
    x = "Month",
    y = "Satisfaction Score"
  ) +
  theme_minimal()