This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(readxl) #for reading Excel files
library(tidyverse) #for data manipulation
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2) #
library(dplyr)
library(janitor) #for cleaning column names
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(skimr) #for summarizing data
library(psych) #for Descriptive Analysis
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
setwd("D:\\School of Statistician")
Coffee_Survey <- read_excel("D:\\School of Statistician\\Coffee_Shop_Survey.xlsx")
head(Coffee_Survey)
## # A tibble: 6 × 9
## Customer_ID Age Gender Visit_Frequency Favorite_Product Satisfaction_Score
## <chr> <dbl> <chr> <dbl> <chr> <dbl>
## 1 CUST001 56 Male 1 Sandwich 2
## 2 CUST002 46 Male 2 Sandwich 1
## 3 CUST003 32 Male 6 Pastry 5
## 4 CUST004 60 Female 2 Pastry 4
## 5 CUST005 25 Female 3 Tea 2
## 6 CUST006 38 Female 7 Pastry 1
## # ℹ 3 more variables: `Time_Spent (min)` <dbl>, Loyalty_Member <chr>,
## # Would_Recommend <chr>
tail(Coffee_Survey)
## # A tibble: 6 × 9
## Customer_ID Age Gender Visit_Frequency Favorite_Product Satisfaction_Score
## <chr> <dbl> <chr> <dbl> <chr> <dbl>
## 1 CUST095 42 Male 7 Pastry 2
## 2 CUST096 24 Female 6 Coffee 1
## 3 CUST097 26 Female 2 Tea 5
## 4 CUST098 41 Female 0 Pastry 5
## 5 CUST099 18 Male 7 Tea 4
## 6 CUST100 61 Male 0 Pastry 2
## # ℹ 3 more variables: `Time_Spent (min)` <dbl>, Loyalty_Member <chr>,
## # Would_Recommend <chr>
str(Coffee_Survey)
## tibble [100 × 9] (S3: tbl_df/tbl/data.frame)
## $ Customer_ID : chr [1:100] "CUST001" "CUST002" "CUST003" "CUST004" ...
## $ Age : num [1:100] 56 46 32 60 25 38 56 36 40 28 ...
## $ Gender : chr [1:100] "Male" "Male" "Male" "Female" ...
## $ Visit_Frequency : num [1:100] 1 2 6 2 3 7 6 3 0 0 ...
## $ Favorite_Product : chr [1:100] "Sandwich" "Sandwich" "Pastry" "Pastry" ...
## $ Satisfaction_Score: num [1:100] 2 1 5 4 2 1 4 5 4 1 ...
## $ Time_Spent (min) : num [1:100] 53 32 36 31 24 28 16 54 39 37 ...
## $ Loyalty_Member : chr [1:100] "Yes" "No" "Yes" "Yes" ...
## $ Would_Recommend : chr [1:100] "No" "Yes" "Yes" "Yes" ...
Coffee_Survey <- Coffee_Survey %>% clean_names()
#Quick Summary
skim(Coffee_Survey)
| Name | Coffee_Survey |
| Number of rows | 100 |
| Number of columns | 9 |
| _______________________ | |
| Column type frequency: | |
| character | 5 |
| numeric | 4 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| customer_id | 0 | 1 | 7 | 7 | 0 | 100 | 0 |
| gender | 0 | 1 | 4 | 6 | 0 | 3 | 0 |
| favorite_product | 0 | 1 | 3 | 8 | 0 | 4 | 0 |
| loyalty_member | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| would_recommend | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| age | 0 | 1 | 40.88 | 13.99 | 18 | 30.50 | 41.0 | 53.25 | 64 | ▇▆▇▃▇ |
| visit_frequency | 0 | 1 | 3.51 | 2.61 | 0 | 1.00 | 3.0 | 6.00 | 7 | ▇▃▅▂▇ |
| satisfaction_score | 0 | 1 | 3.16 | 1.46 | 1 | 2.00 | 3.0 | 4.25 | 5 | ▅▇▃▇▇ |
| time_spent_min | 0 | 1 | 33.36 | 14.63 | 5 | 21.75 | 33.5 | 45.00 | 60 | ▆▅▇▇▆ |
mean(Coffee_Survey$age, na.rm = TRUE)
## [1] 40.88
# na.rm =TRUE means: ignore missing (NA) values of any
# This will print the average(mean) age.
Coffee_Survey %>%
count(gender) %>%
mutate(percentage = n/ sum(n) * 100)
## # A tibble: 3 × 3
## gender n percentage
## <chr> <int> <dbl>
## 1 Female 43 43
## 2 Male 49 49
## 3 Other 8 8
# Shows how many males, females, and others there are
# Percentage shows the percentage of each gender group
# Most and least popular products
Coffee_Survey %>%
count(favorite_product, sort = TRUE)
## # A tibble: 4 × 2
## favorite_product n
## <chr> <int>
## 1 Tea 27
## 2 Pastry 25
## 3 Coffee 24
## 4 Sandwich 24
# Counts how many people prefer each product
# Top will show the most popular
# The bottom will show the least popular
# Visit Frequency by Gender
Coffee_Survey %>% # Start with the
group_by(gender, visit_frequency) %>%
summarise(count =n()) %>% # count how many rows(Customers) are in that group. n is the number of rows
arrange(gender, desc(count))
## `summarise()` has grouped output by 'gender'. You can override using the
## `.groups` argument.
## # A tibble: 21 × 3
## # Groups: gender [3]
## gender visit_frequency count
## <chr> <dbl> <int>
## 1 Female 0 8
## 2 Female 3 8
## 3 Female 2 7
## 4 Female 7 7
## 5 Female 1 4
## 6 Female 4 3
## 7 Female 5 3
## 8 Female 6 3
## 9 Male 7 13
## 10 Male 0 9
## # ℹ 11 more rows
# Create Age Groups
Coffee_Survey <- Coffee_Survey %>%
mutate(age_group = case_when( # mutate= Create or modify columns
age < 20 ~ "Under 20",
age >= 20 & age < 30 ~ "20-29",
age >= 30 & age < 40 ~ "30-39",
age >= 40 & age < 50 ~ "40-49",
age >= 50 ~ "50+",
TRUE ~ "Unknown" # if none of the above apply, label as Unknown.
))
# Visit Frequency by Age Group
Coffee_Survey %>%
group_by(age_group, visit_frequency) %>%
summarise(count =n()) %>%
arrange(age_group, desc(count))
## `summarise()` has grouped output by 'age_group'. You can override using the
## `.groups` argument.
## # A tibble: 34 × 3
## # Groups: age_group [5]
## age_group visit_frequency count
## <chr> <dbl> <int>
## 1 20-29 0 4
## 2 20-29 2 4
## 3 20-29 5 4
## 4 20-29 3 3
## 5 20-29 7 2
## 6 20-29 4 1
## 7 20-29 6 1
## 8 30-39 3 6
## 9 30-39 7 6
## 10 30-39 2 3
## # ℹ 24 more rows
# Shows how often different age groups visit
# Boxplot: Visit Frequency vs Satisfaction Score
library(ggplot2)
ggplot(Coffee_Survey, aes(x = visit_frequency, y = satisfaction_score)) +
geom_boxplot(fill = "green") +
labs(title = "Visit Frequency Vs Satisfaction Score",
x = "Visit Frequency",
y = "Satisfaction Score") +
theme_minimal()
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
# Each box shows the range of satisfaction scores for each visit frequency category.
# You can visually see if frequent visitors are more or less satisfied.
cor.test(Coffee_Survey$visit_frequency, Coffee_Survey$satisfaction_score, method = "spearman")
## Warning in cor.test.default(Coffee_Survey$visit_frequency,
## Coffee_Survey$satisfaction_score, : Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: Coffee_Survey$visit_frequency and Coffee_Survey$satisfaction_score
## S = 168487, p-value = 0.9133
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.01102338
ggplot(Coffee_Survey, aes(x = visit_frequency, y = satisfaction_score)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "blue") +
ggtitle("Scatter Plot: Visit_Frequency vs Satisfaction_Score")
## `geom_smooth()` using formula = 'y ~ x'
theme_minimal()
## List of 136
## $ line :List of 6
## ..$ colour : chr "black"
## ..$ linewidth : num 0.5
## ..$ linetype : num 1
## ..$ lineend : chr "butt"
## ..$ arrow : logi FALSE
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_line" "element"
## $ rect :List of 5
## ..$ fill : chr "white"
## ..$ colour : chr "black"
## ..$ linewidth : num 0.5
## ..$ linetype : num 1
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_rect" "element"
## $ text :List of 11
## ..$ family : chr ""
## ..$ face : chr "plain"
## ..$ colour : chr "black"
## ..$ size : num 11
## ..$ hjust : num 0.5
## ..$ vjust : num 0.5
## ..$ angle : num 0
## ..$ lineheight : num 0.9
## ..$ margin : 'margin' num [1:4] 0points 0points 0points 0points
## .. ..- attr(*, "unit")= int 8
## ..$ debug : logi FALSE
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ title : NULL
## $ aspect.ratio : NULL
## $ axis.title : NULL
## $ axis.title.x :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 1
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 2.75points 0points 0points 0points
## .. ..- attr(*, "unit")= int 8
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.title.x.top :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 0
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0points 0points 2.75points 0points
## .. ..- attr(*, "unit")= int 8
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.title.x.bottom : NULL
## $ axis.title.y :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 1
## ..$ angle : num 90
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0points 2.75points 0points 0points
## .. ..- attr(*, "unit")= int 8
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.title.y.left : NULL
## $ axis.title.y.right :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 1
## ..$ angle : num -90
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0points 0points 0points 2.75points
## .. ..- attr(*, "unit")= int 8
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.text :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : chr "grey30"
## ..$ size : 'rel' num 0.8
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.text.x :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 1
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 2.2points 0points 0points 0points
## .. ..- attr(*, "unit")= int 8
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.text.x.top :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 0
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0points 0points 2.2points 0points
## .. ..- attr(*, "unit")= int 8
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.text.x.bottom : NULL
## $ axis.text.y :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 1
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0points 2.2points 0points 0points
## .. ..- attr(*, "unit")= int 8
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.text.y.left : NULL
## $ axis.text.y.right :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 0
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0points 0points 0points 2.2points
## .. ..- attr(*, "unit")= int 8
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.text.theta : NULL
## $ axis.text.r :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 0.5
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0points 2.2points 0points 2.2points
## .. ..- attr(*, "unit")= int 8
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.ticks : list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## $ axis.ticks.x : NULL
## $ axis.ticks.x.top : NULL
## $ axis.ticks.x.bottom : NULL
## $ axis.ticks.y : NULL
## $ axis.ticks.y.left : NULL
## $ axis.ticks.y.right : NULL
## $ axis.ticks.theta : NULL
## $ axis.ticks.r : NULL
## $ axis.minor.ticks.x.top : NULL
## $ axis.minor.ticks.x.bottom : NULL
## $ axis.minor.ticks.y.left : NULL
## $ axis.minor.ticks.y.right : NULL
## $ axis.minor.ticks.theta : NULL
## $ axis.minor.ticks.r : NULL
## $ axis.ticks.length : 'simpleUnit' num 2.75points
## ..- attr(*, "unit")= int 8
## $ axis.ticks.length.x : NULL
## $ axis.ticks.length.x.top : NULL
## $ axis.ticks.length.x.bottom : NULL
## $ axis.ticks.length.y : NULL
## $ axis.ticks.length.y.left : NULL
## $ axis.ticks.length.y.right : NULL
## $ axis.ticks.length.theta : NULL
## $ axis.ticks.length.r : NULL
## $ axis.minor.ticks.length : 'rel' num 0.75
## $ axis.minor.ticks.length.x : NULL
## $ axis.minor.ticks.length.x.top : NULL
## $ axis.minor.ticks.length.x.bottom: NULL
## $ axis.minor.ticks.length.y : NULL
## $ axis.minor.ticks.length.y.left : NULL
## $ axis.minor.ticks.length.y.right : NULL
## $ axis.minor.ticks.length.theta : NULL
## $ axis.minor.ticks.length.r : NULL
## $ axis.line : list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## $ axis.line.x : NULL
## $ axis.line.x.top : NULL
## $ axis.line.x.bottom : NULL
## $ axis.line.y : NULL
## $ axis.line.y.left : NULL
## $ axis.line.y.right : NULL
## $ axis.line.theta : NULL
## $ axis.line.r : NULL
## $ legend.background : list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## $ legend.margin : 'margin' num [1:4] 5.5points 5.5points 5.5points 5.5points
## ..- attr(*, "unit")= int 8
## $ legend.spacing : 'simpleUnit' num 11points
## ..- attr(*, "unit")= int 8
## $ legend.spacing.x : NULL
## $ legend.spacing.y : NULL
## $ legend.key : list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## $ legend.key.size : 'simpleUnit' num 1.2lines
## ..- attr(*, "unit")= int 3
## $ legend.key.height : NULL
## $ legend.key.width : NULL
## $ legend.key.spacing : 'simpleUnit' num 5.5points
## ..- attr(*, "unit")= int 8
## $ legend.key.spacing.x : NULL
## $ legend.key.spacing.y : NULL
## $ legend.frame : NULL
## $ legend.ticks : NULL
## $ legend.ticks.length : 'rel' num 0.2
## $ legend.axis.line : NULL
## $ legend.text :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : 'rel' num 0.8
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ legend.text.position : NULL
## $ legend.title :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 0
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ legend.title.position : NULL
## $ legend.position : chr "right"
## $ legend.position.inside : NULL
## $ legend.direction : NULL
## $ legend.byrow : NULL
## $ legend.justification : chr "center"
## $ legend.justification.top : NULL
## $ legend.justification.bottom : NULL
## $ legend.justification.left : NULL
## $ legend.justification.right : NULL
## $ legend.justification.inside : NULL
## $ legend.location : NULL
## $ legend.box : NULL
## $ legend.box.just : NULL
## $ legend.box.margin : 'margin' num [1:4] 0cm 0cm 0cm 0cm
## ..- attr(*, "unit")= int 1
## $ legend.box.background : list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## $ legend.box.spacing : 'simpleUnit' num 11points
## ..- attr(*, "unit")= int 8
## [list output truncated]
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi TRUE
## - attr(*, "validate")= logi TRUE
#Do loyalty members spend more time at the coffee shop than non-members
# Average time spent by loyalty status
Coffee_Survey %>%
group_by(loyalty_member) %>%
summarise(
average_time_spent_min = mean(time_spent_min, na.rm = TRUE),
median_time_spent_min = median(time_spent_min, na.rm = TRUE),
count = n()
)
## # A tibble: 2 × 4
## loyalty_member average_time_spent_min median_time_spent_min count
## <chr> <dbl> <dbl> <int>
## 1 No 30.5 29 45
## 2 Yes 35.7 37 55
# this gives the average and median time spent for loyalty members vs non
# BoxPlot: Time Spent Vs Loyalty Member
ggplot(Coffee_Survey, aes(x = loyalty_member, y = time_spent_min, fill = loyalty_member)) +
geom_boxplot() +
labs(title = "Time Spent at Coffee Shop by Loyalty Membership",
x = "Loyalty Member",
y = "Time Spent (minutes)") +
theme_minimal()
# This shows the distribution of time spent for loyalty vs non-loyalty customers.
# We compare recommendations vs loyalty membership
# Create a simple cross-tab
# Cross-tab of loyalty membership and recommendation
table(Coffee_Survey$loyalty_member, Coffee_Survey$would_recommend)
##
## No Yes
## No 7 38
## Yes 20 35
# This shows how may loyalty members and non-members would or wouldn't recommend the coffee shop.
Coffee_Survey %>%
group_by(loyalty_member, would_recommend) %>%
summarise(count =n()) %>%
group_by(loyalty_member) %>%
mutate(percent = round(count / sum(count) * 100, 1))
## `summarise()` has grouped output by 'loyalty_member'. You can override using
## the `.groups` argument.
## # A tibble: 4 × 4
## # Groups: loyalty_member [2]
## loyalty_member would_recommend count percent
## <chr> <chr> <int> <dbl>
## 1 No No 7 15.6
## 2 No Yes 38 84.4
## 3 Yes No 20 36.4
## 4 Yes Yes 35 63.6
# this shows for each loyalty group (Yes or No) what percentage would recommend.
#customers who would recommend the coffee shop more likely to be loyal members?
Recommend_Loyalty <- Coffee_Survey %>%
filter(!is.na(would_recommend), !is.na(loyalty_member)) %>%
group_by(would_recommend, loyalty_member) %>%
summarise(count = n()) %>%
ungroup()
## `summarise()` has grouped output by 'would_recommend'. You can override using
## the `.groups` argument.
ggplot(Recommend_Loyalty, aes(x = loyalty_member, y = count, fill = would_recommend)) +
geom_col(position = "dodge") +
labs(
title = "Recommendation vs Loyalty Membership",
x = "Loyalty Member Status",
y = "Number of Customers"
) +
theme_minimal()
# What factors (age, loyalty status, product preference) are associated with higher satisfaction scores?
Coffee_Survey %>%
select(age, satisfaction_score) %>%
cor(use = "complete.obs")
## age satisfaction_score
## age 1.00000000 -0.01436614
## satisfaction_score -0.01436614 1.00000000
# Visual: Box plots
ggplot(Coffee_Survey, aes(x = loyalty_member, y = satisfaction_score, fill = loyalty_member)) +
geom_boxplot() +
theme_minimal() +
labs(title = "Satisfaction Score by Loyalty Status")+
scale_fill_brewer(palette = "Dark2") # Try "Set2", "Pastel1", "Dark2",
ggplot(Coffee_Survey, aes(x = favorite_product, y = satisfaction_score, fill = favorite_product)) +
geom_boxplot() +
theme_minimal() +
labs(title = "Satisfaction Score by Product Preference") +
scale_fill_brewer(palette = "Dark2")
# Filter only Coffee and Pastries
Filtered_Data <- Coffee_Survey %>% filter(favorite_product %in% c("Coffee", "Pastry")) %>%
filter(!is.na(favorite_product)) # remove NAs
table(Filtered_Data$favorite_product)
##
## Coffee Pastry
## 24 25
# T-test
t.test(satisfaction_score ~ favorite_product, data = Filtered_Data)
##
## Welch Two Sample t-test
##
## data: satisfaction_score by favorite_product
## t = -1.183, df = 46.929, p-value = 0.2427
## alternative hypothesis: true difference in means between group Coffee and group Pastry is not equal to 0
## 95 percent confidence interval:
## -1.3232661 0.3432661
## sample estimates:
## mean in group Coffee mean in group Pastry
## 2.75 3.24
# Visual
ggplot(Filtered_Data, aes(x = favorite_product, y = satisfaction_score, fill = favorite_product)) +
geom_boxplot() +
theme_minimal() +
labs(title = "Satisfaction by Product Preference (Coffee vs Pastry)")+
scale_fill_brewer(palette = "Dark2")
Coffee_Survey$would_recommend <- as.factor(Coffee_Survey$would_recommend)
#Build a model
recommend_model <- glm(would_recommend ~ age + gender + visit_frequency +
favorite_product + `time_spent_min` + loyalty_member,
data = Coffee_Survey, family = "binomial")
# Add predicted varibles from the model dataset
Coffee_Survey$predicted_prob <-predict(recommend_model, type = "response")
# convert numerical to characters for consistency
coffee_long <- Coffee_Survey %>%
select(age, `time_spent_min`, visit_frequency, gender, favorite_product, loyalty_member,
predicted_prob) %>%
mutate(age = cut(age, breaks = 5),
`time_spent_min` = cut(`time_spent_min`, breaks = 5),
visit_frequency = as.character(visit_frequency),
gender = as.character(gender),
favorite_product = as.character(favorite_product),
loyalty_member = as.character(loyalty_member)) %>%
pivot_longer(
cols = -predicted_prob,
names_to = "variable",
values_to = "value"
)
ggplot(coffee_long, aes(x = interaction(variable, value), y = predicted_prob, fill = variable)) +
geom_boxplot() +
labs(title = "Predicted Probability of Recommendation",
x = "Variable and Value",
y = "Predicted Probability") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
ggplot(Coffee_Survey, aes(x = visit_frequency)) +
geom_bar(fill = "cyan") +
labs(
title = "Visit Frequency Distribution",
x = "Visit Frequency",
y = "Number of Customers"
) +
theme_minimal()
#Is there any seasonality in satisfaction levels?
# Example if you had 'Visit_Date'
set.seed(123) # For reproducibility
coffee <- Coffee_Survey %>%
mutate(Month = factor(sample(month.abb, n(), replace = TRUE), levels = month.abb))
# Now plot Satisfaction Score across Months
ggplot(coffee, aes(x = Month, y = satisfaction_score)) +
geom_boxplot(fill = "red") +
labs(
title = "Satisfaction Levels Across Months (Simulated)",
x = "Month",
y = "Satisfaction Score"
) +
theme_minimal()