This analysis explores survey data collected from restaurant servers for MKT 4000. The survey examines whether drawing a smiley face on the bill affects tip outcomes, along with demographic and workplace context variables.
# Run this once in the Console if you haven't already:
# install.packages(c("tidyverse", "scales"))
library(tidyverse)
library(scales)
Qualtrics exports three header rows: short column names, full question text, and internal import IDs. We use row 1 as column names and drop rows 2 and 3 before analyzing.
raw <- read_csv(
"BensusenMKGT4000_April 8, 2026_17.22.csv",
show_col_types = FALSE
)
# Drop the two extra Qualtrics header rows
df_raw <- raw %>% slice(-(1:2))
df <- df_raw %>%
select(
consent = Q2,
gender = Q3,
experience = Q4,
satisfaction = Q5,
tip_share = Q6,
establishment = Q7,
draws_smiley = Q8,
tip_impact = Q9_1
) %>%
filter(consent == "1") %>%
mutate(across(everything(), as.numeric))
cat("Total consented responses:", nrow(df), "\n")
## Total consented responses: 30
df_labeled <- df %>%
mutate(
gender = recode(gender,
`1` = "Male",
`2` = "Female",
`4` = "Non-binary / Other"
),
experience = recode(experience,
`1` = "Less than 1 year",
`2` = "1-3 years",
`3` = "3-5 years",
`4` = "5+ years"
),
satisfaction = recode(satisfaction,
`1` = "Very dissatisfied",
`2` = "Dissatisfied",
`3` = "Satisfied",
`4` = "Very satisfied"
),
tip_share = recode(tip_share,
`1` = "Yes",
`2` = "No"
),
establishment = recode(establishment,
`1` = "Chain restaurant",
`2` = "Locally owned"
),
draws_smiley = recode(draws_smiley,
`1` = "Yes",
`2` = "No"
),
tip_impact = recode(tip_impact,
`1` = "Tips increased",
`2` = "No change",
`3` = "Tips decreased"
)
) %>%
mutate(
experience = factor(experience, levels = c("Less than 1 year","1-3 years","3-5 years","5+ years")),
satisfaction = factor(satisfaction, levels = c("Very dissatisfied","Dissatisfied","Satisfied","Very satisfied")),
tip_impact = factor(tip_impact, levels = c("Tips increased","No change","Tips decreased"))
)
cat("--- Gender ---\n"); print(table(df_labeled$gender))
## --- Gender ---
##
## Female Male Non-binary / Other
## 4 21 1
cat("\n--- Experience ---\n"); print(table(df_labeled$experience))
##
## --- Experience ---
##
## Less than 1 year 1-3 years 3-5 years 5+ years
## 6 8 4 7
cat("\n--- Satisfaction ---\n");print(table(df_labeled$satisfaction))
##
## --- Satisfaction ---
##
## Very dissatisfied Dissatisfied Satisfied Very satisfied
## 15 8 2 1
cat("\n--- Tip share? ---\n"); print(table(df_labeled$tip_share))
##
## --- Tip share? ---
##
## No Yes
## 21 5
cat("\n--- Establishment ---\n");print(table(df_labeled$establishment))
##
## --- Establishment ---
##
## Chain restaurant Locally owned
## 8 18
cat("\n--- Draws smiley? ---\n");print(table(df_labeled$draws_smiley))
##
## --- Draws smiley? ---
##
## No Yes
## 12 14
cat("\n--- Tip impact ---\n"); print(table(df_labeled$tip_impact))
##
## --- Tip impact ---
##
## Tips increased No change Tips decreased
## 12 10 3
df_labeled %>%
count(gender) %>%
mutate(pct = n / sum(n)) %>%
ggplot(aes(x = reorder(gender, n), y = pct, fill = gender)) +
geom_col(width = 0.6) +
geom_text(aes(label = paste0(n, " (", percent(pct, accuracy = 1), ")")),
hjust = -0.1, size = 3.8) +
scale_y_continuous(labels = percent_format(), limits = c(0, 0.85)) +
scale_fill_manual(values = c("Male" = "#4A90D9",
"Female" = "#E88B6A",
"Non-binary / Other" = "#8BB56A")) +
coord_flip() +
labs(title = "Gender Identity of Respondents", x = NULL, y = "Percentage") +
survey_theme
df_labeled %>%
count(experience) %>%
mutate(pct = n / sum(n)) %>%
ggplot(aes(x = experience, y = pct, fill = experience)) +
geom_col(width = 0.6) +
geom_text(aes(label = paste0(n, " (", percent(pct, accuracy = 1), ")")),
vjust = -0.4, size = 3.8) +
scale_y_continuous(labels = percent_format(), limits = c(0, 0.60)) +
scale_fill_manual(values = c("#D4E6F5","#7BB8E0","#3A88C5","#1A5C8A")) +
labs(title = "Server Experience in the Industry", x = NULL, y = "Percentage") +
survey_theme
df_labeled %>%
count(satisfaction) %>%
mutate(pct = n / sum(n)) %>%
ggplot(aes(x = satisfaction, y = pct, fill = satisfaction)) +
geom_col(width = 0.6) +
geom_text(aes(label = paste0(n, " (", percent(pct, accuracy = 1), ")")),
vjust = -0.4, size = 3.8) +
scale_y_continuous(labels = percent_format(), limits = c(0, 0.65)) +
scale_fill_manual(values = c("#D9534F","#F0A05A","#5BA85A","#2E7D32")) +
labs(title = "Job Satisfaction Among Servers", x = NULL, y = "Percentage") +
survey_theme
This is the core research question — comparing servers who draw a smiley face on the bill versus those who do not, and the tip outcomes they report.
df_labeled %>%
filter(!is.na(draws_smiley), !is.na(tip_impact)) %>%
count(draws_smiley, tip_impact) %>%
group_by(draws_smiley) %>%
mutate(pct = n / sum(n)) %>%
ggplot(aes(x = tip_impact, y = pct, fill = draws_smiley)) +
geom_col(position = "dodge", width = 0.65) +
geom_text(aes(label = percent(pct, accuracy = 1)),
position = position_dodge(width = 0.65),
vjust = -0.4, size = 3.5) +
scale_y_continuous(labels = percent_format(), limits = c(0, 1.05)) +
scale_fill_manual(values = c("Yes" = "#E88B6A", "No" = "#4A90D9"),
name = "Draws smiley face?") +
labs(title = "Tip Impact by Smiley Face Drawing Behavior",
subtitle = "Among servers who draw vs. don't draw a smiley on the bill",
x = "Reported tip impact", y = "Percentage within group") +
survey_theme +
theme(legend.position = "right")
df_labeled %>%
filter(!is.na(establishment), !is.na(draws_smiley)) %>%
count(establishment, draws_smiley) %>%
group_by(establishment) %>%
mutate(pct = n / sum(n)) %>%
ggplot(aes(x = establishment, y = pct, fill = draws_smiley)) +
geom_col(width = 0.55) +
geom_text(aes(label = percent(pct, accuracy = 1)),
position = position_stack(vjust = 0.5), size = 3.8, color = "white") +
scale_y_continuous(labels = percent_format()) +
scale_fill_manual(values = c("Yes" = "#E88B6A", "No" = "#7BB8E0"),
name = "Draws smiley?") +
labs(title = "Smiley Face Drawing by Establishment Type",
x = NULL, y = "Percentage") +
survey_theme +
theme(legend.position = "right")
df_labeled %>%
filter(!is.na(establishment), !is.na(tip_share)) %>%
count(establishment, tip_share) %>%
group_by(establishment) %>%
mutate(pct = n / sum(n)) %>%
ggplot(aes(x = establishment, y = pct, fill = tip_share)) +
geom_col(width = 0.55) +
geom_text(aes(label = percent(pct, accuracy = 1)),
position = position_stack(vjust = 0.5), size = 3.8, color = "white") +
scale_y_continuous(labels = percent_format()) +
scale_fill_manual(values = c("Yes" = "#5BA85A", "No" = "#D9534F"),
name = "Tip share?") +
labs(title = "Tip Sharing by Establishment Type",
x = NULL, y = "Percentage") +
survey_theme +
theme(legend.position = "right")
df_labeled %>%
filter(!is.na(satisfaction), !is.na(tip_impact)) %>%
count(satisfaction, tip_impact) %>%
pivot_wider(names_from = tip_impact, values_from = n, values_fill = 0)
## # A tibble: 4 × 4
## satisfaction `Tips increased` `No change` `Tips decreased`
## <fct> <int> <int> <int>
## 1 Very dissatisfied 5 7 2
## 2 Dissatisfied 5 3 0
## 3 Satisfied 2 0 0
## 4 Very satisfied 0 0 1
The sample size used for this survey is small (n = 30 consented responses). Please do not generalize these results to the broader population. For generalization purposes, a minimum of 30 responses per subgroup is typically recommended for most statistical analyses.
Wickham, H. (2016). ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag. https://ggplot2.tidyverse.org
Wickham, H., & Grolemund, G. (2017). R for Data Science. O’Reilly. https://r4ds.had.co.nz/
R Markdown. https://rmarkdown.rstudio.com/