Taco Delivery Analysis

Author

Your Name

Published

June 11, 2025

Setup and Data Import

Show code
# Load required libraries
library(tidyverse)
library(lubridate)
library(ggplot2)
library(plotly)
library(DT)
library(corrplot)
library(scales)
library(viridis)

# Set theme for plots
theme_set(theme_minimal() + 
          theme(plot.title = element_text(size = 14, face = "bold"),
                plot.subtitle = element_text(size = 12),
                axis.title = element_text(size = 11)))
Show code
# Read the data
taco_data <- read_csv("taco.csv", show_col_types = FALSE)

# Display raw data structure
glimpse(taco_data)
Rows: 1,000
Columns: 13
$ Order_ID          <dbl> 770487, 671858, 688508, 944962, 476417, 678856, 1836…
$ Restaurant_Name   <chr> "El Taco Loco", "El Taco Loco", "Taco Haven", "Spicy…
$ Location          <chr> "New York", "San Antonio", "Austin", "Dallas", "San …
$ Order_Time        <chr> "1/8/2024 14:55", "23-11-2024 17:11", "21-11-2024 20…
$ Delivery_Time     <chr> "1/8/2024 15:36", "23-11-2024 17:25", "21-11-2024 21…
$ Delivery_Duration <dbl> 41, 14, 38, 45, 15, 83, 45, 31, 17, 73, 64, 29, 11, …
$ Taco_Size         <chr> "Regular", "Regular", "Large", "Regular", "Large", "…
$ Taco_Type         <chr> "Chicken Taco", "Beef Taco", "Pork Taco", "Chicken T…
$ Toppings_Count    <dbl> 5, 1, 2, 2, 0, 0, 1, 3, 2, 1, 1, 4, 2, 1, 1, 2, 5, 4…
$ Distance          <dbl> 3.01, 6.20, 20.33, 3.00, 24.34, 16.70, 9.57, 9.80, 1…
$ Price             <dbl> 9.25, 4.25, 7.00, 5.50, 4.50, 3.00, 5.75, 6.75, 5.50…
$ Tip               <dbl> 2.22, 3.01, 0.02, 1.90, 1.14, 2.32, 0.63, 2.97, 0.33…
$ Weekend_Order     <lgl> FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE…

Data Cleaning and Preprocessing

Show code
# Clean and preprocess the data
taco_clean <- taco_data %>%
  # Clean column names
  janitor::clean_names() %>%
  # Parse dates - handle multiple date formats
  mutate(
    order_time = case_when(
      str_detect(order_time, "/") ~ mdy_hm(order_time),
      str_detect(order_time, "-") ~ dmy_hm(order_time),
      TRUE ~ as_datetime(order_time)
    ),
    delivery_time = case_when(
      str_detect(delivery_time, "/") ~ mdy_hm(delivery_time),
      str_detect(delivery_time, "-") ~ dmy_hm(delivery_time),
      TRUE ~ as_datetime(delivery_time)
    )
  ) %>%
  # Create additional time-based variables
  mutate(
    order_hour = hour(order_time),
    order_day = wday(order_time, label = TRUE),
    order_month = month(order_time, label = TRUE),
    delivery_speed = case_when(
      delivery_duration <= 20 ~ "Fast",
      delivery_duration <= 40 ~ "Medium",
      TRUE ~ "Slow"
    ),
    price_per_km = price / distance,
    tip_percentage = (tip / price) * 100,
    high_tipper = tip_percentage > 15
  ) %>%
  # Convert categorical variables to factors with proper ordering
  mutate(
    restaurant_name = as.factor(restaurant_name),
    location = factor(location, levels = c("Austin", "Dallas", "Los Angeles", 
                                          "New York", "San Antonio")),
    taco_size = factor(taco_size, levels = c("Regular", "Large")),
    taco_type = as.factor(taco_type),
    weekend_order = as.logical(weekend_order),
    delivery_speed = factor(delivery_speed, levels = c("Fast", "Medium", "Slow"))
  )

# Display cleaned data summary
summary(taco_clean)
    order_id              restaurant_name        location  
 Min.   :101139   Urban Tacos     :117    Austin     : 95  
 1st Qu.:331797   Grande Tacos    :106    Dallas     : 80  
 Median :559740   The Taco Stand  :105    Los Angeles:101  
 Mean   :552505   Casa del Taco   :104    New York   : 96  
 3rd Qu.:771782   Spicy Taco House:100    San Antonio:113  
 Max.   :999138   Taco Fiesta     : 99    NA's       :515  
                  (Other)         :369                     
   order_time                     delivery_time                   
 Min.   :2024-01-01 18:39:00.00   Min.   :2024-01-01 19:48:00.00  
 1st Qu.:2024-05-16 07:15:30.00   1st Qu.:2024-05-16 11:58:15.00  
 Median :2024-09-14 02:31:30.00   Median :2024-09-14 03:53:00.00  
 Mean   :2024-09-28 13:28:54.48   Mean   :2024-09-28 13:40:57.48  
 3rd Qu.:2025-01-27 05:15:30.00   3rd Qu.:2025-01-27 06:24:30.00  
 Max.   :2025-12-02 16:55:00.00   Max.   :2025-12-02 17:36:00.00  
                                                                  
 delivery_duration   taco_size          taco_type   toppings_count 
 Min.   :10.00     Regular:502   Beef Taco   :182   Min.   :0.000  
 1st Qu.:30.00     Large  :498   Chicken Taco:218   1st Qu.:1.000  
 Median :53.00                   Fish Taco   :211   Median :3.000  
 Mean   :50.93                   Pork Taco   :192   Mean   :2.529  
 3rd Qu.:71.00                   Veggie Taco :197   3rd Qu.:4.000  
 Max.   :90.00                                      Max.   :5.000  
                                                                   
    distance          price             tip         weekend_order  
 Min.   : 0.510   Min.   : 3.000   Min.   :0.0100   Mode :logical  
 1st Qu.: 6.973   1st Qu.: 4.500   1st Qu.:0.9075   FALSE:725      
 Median :13.200   Median : 6.750   Median :1.7600   TRUE :275      
 Mean   :13.073   Mean   : 6.908   Mean   :1.8061                  
 3rd Qu.:19.242   3rd Qu.: 9.250   3rd Qu.:2.5200                  
 Max.   :24.980   Max.   :10.750   Max.   :4.9800                  
                                                                   
   order_hour    order_day  order_month  delivery_speed  price_per_km    
 Min.   : 0.00   Sun:131   Mar    :119   Fast  :133     Min.   : 0.1201  
 1st Qu.: 5.00   Mon:174   Jan    :113   Medium:222     1st Qu.: 0.3403  
 Median :11.00   Tue:146   May    :103   Slow  :645     Median : 0.5302  
 Mean   :11.34   Wed:155   Apr    : 99                  Mean   : 1.0987  
 3rd Qu.:17.00   Thu:115   Feb    : 84                  3rd Qu.: 0.9960  
 Max.   :23.00   Fri:139   Jul    : 80                  Max.   :21.0784  
                 Sat:140   (Other):402                                   
 tip_percentage     high_tipper    
 Min.   :  0.1482   Mode :logical  
 1st Qu.: 12.7121   FALSE:300      
 Median : 25.5009   TRUE :700      
 Mean   : 29.4373                  
 3rd Qu.: 40.3522                  
 Max.   :149.6667                  
                                   
Show code
# Create an interactive data table
DT::datatable(taco_clean, 
              options = list(scrollX = TRUE, pageLength = 10),
              caption = "Cleaned Taco Delivery Dataset")

Exploratory Data Analysis

Distribution of Key Variables

Show code
# Create distribution plots
p1 <- ggplot(taco_clean, aes(x = delivery_duration)) +
  geom_histogram(bins = 15, fill = "steelblue", alpha = 0.7) +
  labs(title = "Delivery Duration Distribution", x = "Minutes", y = "Count")

p2 <- ggplot(taco_clean, aes(x = price)) +
  geom_histogram(bins = 15, fill = "forestgreen", alpha = 0.7) +
  labs(title = "Price Distribution", x = "Price ($)", y = "Count")

p3 <- ggplot(taco_clean, aes(x = tip_percentage)) +
  geom_histogram(bins = 15, fill = "orange", alpha = 0.7) +
  labs(title = "Tip Percentage Distribution", x = "Tip %", y = "Count")

p4 <- ggplot(taco_clean, aes(x = distance)) +
  geom_histogram(bins = 15, fill = "purple", alpha = 0.7) +
  labs(title = "Distance Distribution", x = "Distance", y = "Count")

# Combine plots
gridExtra::grid.arrange(p1, p2, p3, p4, ncol = 2)

Distribution of Key Numerical Variables

Restaurant Performance Analysis

Show code
# Restaurant performance summary
restaurant_stats <- taco_clean %>%
  group_by(restaurant_name) %>%
  summarise(
    total_orders = n(),
    avg_delivery_time = round(mean(delivery_duration), 1),
    avg_price = round(mean(price), 2),
    avg_tip_pct = round(mean(tip_percentage), 1),
    avg_distance = round(mean(distance), 2),
    .groups = 'drop'
  ) %>%
  arrange(desc(total_orders))

# Display restaurant stats
knitr::kable(restaurant_stats, 
             caption = "Restaurant Performance Summary",
             col.names = c("Restaurant", "Orders", "Avg Delivery (min)", 
                          "Avg Price ($)", "Avg Tip (%)", "Avg Distance"))
Restaurant Performance Summary
Restaurant Orders Avg Delivery (min) Avg Price ($) Avg Tip (%) Avg Distance
Urban Tacos 117 51.6 6.79 28.0 13.21
Grande Tacos 106 51.5 6.86 32.8 13.75
The Taco Stand 105 53.6 6.88 29.9 12.71
Casa del Taco 104 50.3 6.81 30.1 12.45
Spicy Taco House 100 51.9 7.33 25.7 13.03
Taco Fiesta 99 49.9 7.19 27.2 12.94
La Vida Taco 98 47.1 6.80 30.2 13.12
Taco Haven 95 51.9 6.88 30.8 13.43
Taco Time Express 91 52.7 6.78 31.1 13.64
El Taco Loco 85 48.2 6.75 28.7 12.43

Restaurant Performance Metrics

Show code
# Visualize restaurant performance
ggplot(restaurant_stats, aes(x = reorder(restaurant_name, avg_delivery_time), 
                            y = avg_delivery_time)) +
  geom_col(fill = "coral", alpha = 0.8) +
  coord_flip() +
  labs(title = "Average Delivery Time by Restaurant",
       x = "Restaurant", y = "Average Delivery Time (minutes)")

Restaurant Performance Metrics

Location Analysis (Categorical)

Show code
# Comprehensive location summary with categorical analysis
location_stats <- taco_clean %>%
  group_by(location) %>%
  summarise(
    total_orders = n(),
    avg_delivery_time = round(mean(delivery_duration), 1),
    median_delivery_time = round(median(delivery_duration), 1),
    avg_price = round(mean(price), 2),
    avg_distance = round(mean(distance), 2),
    avg_tip_pct = round(mean(tip_percentage), 1),
    weekend_orders = sum(weekend_order),
    weekend_pct = round((weekend_orders / total_orders) * 100, 1),
    fast_deliveries = sum(delivery_speed == "Fast"),
    fast_delivery_pct = round((fast_deliveries / total_orders) * 100, 1),
    high_tippers = sum(high_tipper),
    high_tipper_pct = round((high_tippers / total_orders) * 100, 1),
    .groups = 'drop'
  ) %>%
  arrange(desc(total_orders))

# Display enhanced location stats
knitr::kable(location_stats, 
             caption = "Comprehensive Location Performance Analysis",
             col.names = c("Location", "Orders", "Avg Delivery (min)", "Median Delivery (min)",
                          "Avg Price ($)", "Avg Distance", "Avg Tip (%)", 
                          "Weekend Orders", "Weekend %", "Fast Deliveries", 
                          "Fast %", "High Tippers", "High Tip %"))
Comprehensive Location Performance Analysis
Location Orders Avg Delivery (min) Median Delivery (min) Avg Price ($) Avg Distance Avg Tip (%) Weekend Orders Weekend % Fast Deliveries Fast % High Tippers High Tip %
NA 515 51.3 54.0 6.86 13.09 30.8 142 27.6 68 13.2 367 71.3
San Antonio 113 49.8 50.0 6.80 12.45 30.0 31 27.4 17 15.0 76 67.3
Los Angeles 101 49.9 52.0 7.30 13.38 25.8 24 23.8 13 12.9 70 69.3
New York 96 49.8 52.0 7.04 12.34 27.3 28 29.2 16 16.7 63 65.6
Austin 95 49.0 51.0 6.74 13.84 27.9 31 32.6 11 11.6 65 68.4
Dallas 80 55.0 59.5 6.93 13.40 28.9 19 23.8 8 10.0 59 73.8

Enhanced Location Analysis with Categorical Treatment

Show code
# Multi-panel location comparison
p1_loc <- ggplot(taco_clean, aes(x = location, y = delivery_duration, fill = location)) +
  geom_boxplot(alpha = 0.7) +
  scale_fill_viridis_d() +
  labs(title = "Delivery Time Distribution by Location",
       x = "Location", y = "Delivery Duration (minutes)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none")

p2_loc <- ggplot(taco_clean, aes(x = location, y = price, fill = location)) +
  geom_boxplot(alpha = 0.7) +
  scale_fill_viridis_d() +
  labs(title = "Price Distribution by Location",
       x = "Location", y = "Price ($)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none")

p3_loc <- ggplot(taco_clean, aes(x = location, y = tip_percentage, fill = location)) +
  geom_boxplot(alpha = 0.7) +
  scale_fill_viridis_d() +
  labs(title = "Tip Percentage by Location",
       x = "Location", y = "Tip Percentage (%)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none")

p4_loc <- ggplot(location_stats, aes(x = reorder(location, total_orders), 
                                    y = total_orders, fill = location)) +
  geom_col(alpha = 0.8) +
  scale_fill_viridis_d() +
  coord_flip() +
  labs(title = "Total Orders by Location",
       x = "Location", y = "Number of Orders") +
  theme(legend.position = "none")

# Combine location plots
gridExtra::grid.arrange(p1_loc, p2_loc, p3_loc, p4_loc, ncol = 2)

Enhanced Location Analysis with Categorical Treatment
Show code
# Create location performance rankings
location_rankings <- location_stats %>%
  mutate(
    delivery_rank = rank(avg_delivery_time),
    price_rank = rank(desc(avg_price)),
    tip_rank = rank(desc(avg_tip_pct)),
    volume_rank = rank(desc(total_orders)),
    fast_delivery_rank = rank(desc(fast_delivery_pct))
  ) %>%
  select(location, delivery_rank, price_rank, tip_rank, volume_rank, fast_delivery_rank)

# Reshape for heatmap
location_rankings_long <- location_rankings %>%
  pivot_longer(cols = -location, names_to = "metric", values_to = "rank") %>%
  mutate(
    metric = factor(metric, 
                   levels = c("volume_rank", "delivery_rank", "fast_delivery_rank", 
                             "price_rank", "tip_rank"),
                   labels = c("Order Volume", "Delivery Speed", "Fast Delivery %", 
                             "Price Level", "Tip %"))
  )

# Create performance heatmap
ggplot(location_rankings_long, aes(x = metric, y = location, fill = rank)) +
  geom_tile(color = "white") +
  scale_fill_viridis_c(name = "Rank", trans = "reverse", 
                       breaks = c(1, 2, 3, 4, 5),
                       labels = c("1st", "2nd", "3rd", "4th", "5th")) +
  geom_text(aes(label = paste0("#", rank)), color = "white", fontface = "bold") +
  labs(title = "Location Performance Rankings",
       subtitle = "Lower numbers indicate better performance",
       x = "Performance Metric", y = "Location") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Location Performance Metrics and Rankings
Show code
# Statistical tests for location differences
# ANOVA for delivery duration by location
delivery_aov <- aov(delivery_duration ~ location, data = taco_clean)
cat("ANOVA Results for Delivery Duration by Location:\n")
ANOVA Results for Delivery Duration by Location:
Show code
print(summary(delivery_aov))
             Df Sum Sq Mean Sq F value Pr(>F)
location      4   2011   502.8   0.921  0.452
Residuals   480 262154   546.2               
515 observations deleted due to missingness
Show code
# Chi-square test for weekend orders by location
weekend_table <- table(taco_clean$location, taco_clean$weekend_order)
weekend_chi <- chisq.test(weekend_table)
cat("\nChi-square Test for Weekend Orders by Location:\n")

Chi-square Test for Weekend Orders by Location:
Show code
print(weekend_chi)

    Pearson's Chi-squared test

data:  weekend_table
X-squared = 2.6639, df = 4, p-value = 0.6155

Taco Preferences and Patterns

Show code
# Taco type analysis
taco_summary <- taco_clean %>%
  count(taco_type, taco_size, name = "orders") %>%
  mutate(percentage = round((orders / sum(orders)) * 100, 1))

# Visualize taco preferences
ggplot(taco_summary, aes(x = taco_type, y = orders, fill = taco_size)) +
  geom_col(position = "dodge", alpha = 0.8) +
  geom_text(aes(label = orders), position = position_dodge(width = 0.9), 
            vjust = -0.5, size = 3) +
  scale_fill_viridis_d() +
  labs(title = "Taco Orders by Type and Size",
       x = "Taco Type", y = "Number of Orders", fill = "Size")

Taco Type and Size Preferences
Show code
# Toppings analysis
ggplot(taco_clean, aes(x = factor(toppings_count))) +
  geom_bar(fill = "skyblue", alpha = 0.8) +
  labs(title = "Distribution of Toppings Count",
       x = "Number of Toppings", y = "Number of Orders")

Taco Type and Size Preferences

Time-Based Analysis

Show code
# Order patterns by hour
hourly_orders <- taco_clean %>%
  count(order_hour) %>%
  mutate(time_period = case_when(
    order_hour < 12 ~ "Morning",
    order_hour < 17 ~ "Afternoon",
    TRUE ~ "Evening"
  ))

ggplot(hourly_orders, aes(x = order_hour, y = n, fill = time_period)) +
  geom_col(alpha = 0.8) +
  scale_fill_brewer(type = "qual", palette = "Set2") +
  labs(title = "Orders by Hour of Day",
       x = "Hour", y = "Number of Orders", fill = "Time Period")

Temporal Patterns in Orders
Show code
# Weekend vs weekday analysis
weekend_comparison <- taco_clean %>%
  group_by(weekend_order) %>%
  summarise(
    avg_price = mean(price),
    avg_tip = mean(tip_percentage),
    avg_delivery = mean(delivery_duration),
    .groups = 'drop'
  ) %>%
  pivot_longer(cols = -weekend_order, names_to = "metric", values_to = "value") %>%
  mutate(day_type = ifelse(weekend_order, "Weekend", "Weekday"))

ggplot(weekend_comparison, aes(x = metric, y = value, fill = day_type)) +
  geom_col(position = "dodge", alpha = 0.8) +
  facet_wrap(~metric, scales = "free_y") +
  scale_fill_manual(values = c("Weekday" = "lightblue", "Weekend" = "salmon")) +
  labs(title = "Weekend vs Weekday Comparison",
       x = "Metric", y = "Average Value", fill = "Day Type") +
  theme(axis.text.x = element_blank())

Temporal Patterns in Orders

Statistical Analysis and Correlations

Show code
# Select numerical variables for correlation analysis
numeric_vars <- taco_clean %>%
  select(delivery_duration, distance, price, tip, toppings_count, 
         tip_percentage, price_per_km) %>%
  cor(use = "complete.obs")

# Create correlation plot
corrplot(numeric_vars, method = "color", type = "upper", 
         addCoef.col = "black", tl.col = "black", tl.srt = 45,
         title = "Correlation Matrix of Key Variables", mar = c(0,0,2,0))

Correlation Matrix of Numerical Variables

Delivery Performance Analysis

Show code
# Analyze factors affecting delivery time
delivery_analysis <- taco_clean %>%
  select(delivery_duration, distance, taco_size, weekend_order, 
         order_hour, toppings_count, restaurant_name) %>%
  mutate(peak_hour = order_hour %in% c(12, 13, 18, 19, 20))

# Distance vs delivery time
ggplot(taco_clean, aes(x = distance, y = delivery_duration)) +
  geom_point(aes(color = delivery_speed), alpha = 0.7, size = 2) +
  geom_smooth(method = "lm", se = TRUE, color = "black", linetype = "dashed") +
  scale_color_viridis_d() +
  labs(title = "Delivery Time vs Distance",
       subtitle = "Colored by delivery speed category",
       x = "Distance", y = "Delivery Duration (minutes)", color = "Speed")

Factors Affecting Delivery Performance
Show code
# Box plot of delivery times by various factors
p1 <- ggplot(taco_clean, aes(x = taco_size, y = delivery_duration, fill = taco_size)) +
  geom_boxplot(alpha = 0.7) +
  labs(title = "Delivery Time by Taco Size", x = "Size", y = "Minutes") +
  theme(legend.position = "none")

p2 <- ggplot(taco_clean, aes(x = weekend_order, y = delivery_duration, 
                            fill = weekend_order)) +
  geom_boxplot(alpha = 0.7) +
  labs(title = "Delivery Time: Weekend vs Weekday", 
       x = "Weekend Order", y = "Minutes") +
  theme(legend.position = "none")

gridExtra::grid.arrange(p1, p2, ncol = 2)

Factors Affecting Delivery Performance

Location-Based Revenue Analysis

Show code
# Revenue analysis by location
location_revenue <- taco_clean %>%
  group_by(location) %>%
  summarise(
    total_revenue = sum(price),
    total_tips = sum(tip),
    avg_revenue_per_order = round(mean(price), 2),
    avg_tip_per_order = round(mean(tip), 2),
    revenue_per_order_rank = rank(desc(avg_revenue_per_order)),
    tip_efficiency = round(total_tips / total_revenue * 100, 2),
    .groups = 'drop'
  ) %>%
  arrange(desc(total_revenue))

# Display location revenue summary
knitr::kable(location_revenue,
             caption = "Revenue Analysis by Location",
             col.names = c("Location", "Total Revenue ($)", "Total Tips ($)",
                          "Avg Revenue/Order ($)", "Avg Tip/Order ($)", 
                          "Revenue Rank", "Tip Efficiency (%)"))
Revenue Analysis by Location
Location Total Revenue (\()| Total Tips (\)) Avg Revenue/Order (\()| Avg Tip/Order (\)) Revenue Rank Tip Efficiency (%)
NA 3531.00 962.77 6.86 1.87 1 27.27
San Antonio 768.25 197.07 6.80 1.74 1 25.65
Los Angeles 737.50 173.81 7.30 1.72 1 23.57
New York 676.25 169.06 7.04 1.76 1 25.00
Austin 640.50 157.53 6.74 1.66 1 24.59
Dallas 554.75 145.87 6.93 1.82 1 26.29

Revenue Analysis by Location

Show code
# Revenue visualization
p1_rev <- ggplot(location_revenue, aes(x = reorder(location, total_revenue), 
                                      y = total_revenue, fill = location)) +
  geom_col(alpha = 0.8) +
  scale_fill_viridis_d() +
  coord_flip() +
  labs(title = "Total Revenue by Location", x = "Location", y = "Total Revenue ($)") +
  theme(legend.position = "none")

p2_rev <- ggplot(location_revenue, aes(x = reorder(location, tip_efficiency), 
                                      y = tip_efficiency, fill = location)) +
  geom_col(alpha = 0.8) +
  scale_fill_viridis_d() +
  coord_flip() +
  labs(title = "Tip Efficiency by Location", x = "Location", 
       y = "Tips as % of Revenue") +
  theme(legend.position = "none")

gridExtra::grid.arrange(p1_rev, p2_rev, ncol = 2)

Revenue Analysis by Location

Location-Specific Operational Analysis

Show code
# Analyze operational patterns by location
location_operations <- taco_clean %>%
  group_by(location) %>%
  summarise(
    avg_toppings = round(mean(toppings_count), 1),
    most_common_taco = names(sort(table(taco_type), decreasing = TRUE))[1],
    most_common_size = names(sort(table(taco_size), decreasing = TRUE))[1],
    peak_hour = names(sort(table(order_hour), decreasing = TRUE))[1],
    avg_distance_efficiency = round(mean(price_per_km), 2),
    .groups = 'drop'
  )

# Display operational patterns
knitr::kable(location_operations,
             caption = "Operational Patterns by Location",
             col.names = c("Location", "Avg Toppings", "Most Popular Taco",
                          "Most Popular Size", "Peak Hour", "Price/KM ($)"))
Operational Patterns by Location
Location Avg Toppings Most Popular Taco Most Popular Size Peak Hour Price/KM ($)
Austin 2.5 Pork Taco Regular 0 0.98
Dallas 2.6 Fish Taco Regular 2 1.02
Los Angeles 2.7 Beef Taco Large 10 1.14
New York 2.6 Fish Taco Regular 2 1.57
San Antonio 2.4 Chicken Taco Large 15 1.25
NA 2.5 Chicken Taco Large 5 1.00

Operational Patterns by Location

Show code
# Taco type preferences by location
taco_location_cross <- taco_clean %>%
  count(location, taco_type) %>%
  group_by(location) %>%
  mutate(percentage = round(n / sum(n) * 100, 1)) %>%
  ungroup()

ggplot(taco_location_cross, aes(x = location, y = percentage, fill = taco_type)) +
  geom_col(position = "fill", alpha = 0.8) +
  scale_fill_viridis_d() +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(title = "Taco Type Distribution by Location",
       x = "Location", y = "Percentage of Orders", fill = "Taco Type") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Operational Patterns by Location

Revenue and Tipping Analysis

Show code
# Revenue analysis
revenue_stats <- taco_clean %>%
  summarise(
    total_revenue = sum(price),
    total_tips = sum(tip),
    avg_order_value = mean(price),
    avg_tip_amount = mean(tip),
    avg_tip_percentage = mean(tip_percentage),
    high_tippers = sum(high_tipper),
    high_tipper_rate = mean(high_tipper) * 100
  )

# Display revenue summary
revenue_summary <- data.frame(
  Metric = c("Total Revenue", "Total Tips", "Average Order Value", 
             "Average Tip Amount", "Average Tip %", "High Tipper Rate %"),
  Value = c(paste0("$", round(revenue_stats$total_revenue, 2)),
            paste0("$", round(revenue_stats$total_tips, 2)),
            paste0("$", round(revenue_stats$avg_order_value, 2)),
            paste0("$", round(revenue_stats$avg_tip_amount, 2)),
            paste0(round(revenue_stats$avg_tip_percentage, 1), "%"),
            paste0(round(revenue_stats$high_tipper_rate, 1), "%"))
)

knitr::kable(revenue_summary, caption = "Revenue and Tipping Summary")
Revenue and Tipping Summary
Metric Value
Total Revenue $6908.25
Total Tips $1806.11
Average Order Value $6.91
Average Tip Amount $1.81
Average Tip % 29.4%
High Tipper Rate % 70%

Revenue and Tipping Patterns

Show code
# Tip analysis by various factors including location
p1_tip <- ggplot(taco_clean, aes(x = price, y = tip_percentage, color = location)) +
  geom_point(alpha = 0.7, size = 2) +
  geom_smooth(method = "lm", se = FALSE) +
  scale_color_viridis_d() +
  labs(title = "Tip Percentage vs Order Price by Location",
       x = "Order Price ($)", y = "Tip Percentage (%)", color = "Location")

p2_tip <- ggplot(taco_clean, aes(x = location, y = tip_percentage, fill = location)) +
  geom_violin(alpha = 0.7) +
  geom_boxplot(width = 0.1, fill = "white", alpha = 0.8) +
  scale_fill_viridis_d() +
  labs(title = "Tip Distribution by Location",
       x = "Location", y = "Tip Percentage (%)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none")

gridExtra::grid.arrange(p1_tip, p2_tip, ncol = 1)

Revenue and Tipping Patterns

Key Insights and Recommendations

Summary of Key Findings:

Order Volume & Performance: - Total orders analyzed: 1000 - Average delivery time: 50.9 minutes - Most popular taco type: Chicken Taco - Operating locations: 6 cities

Location-Based Insights: - Busiest location: NA - Fastest delivery location: Austin - Highest revenue location: NA - Best tipping location: NA

Operational Patterns: - Peak ordering hour: 4:00 - Significant location-based differences in performance metrics - Location affects delivery times, pricing, and customer behavior

Statistical Findings: - ANOVA results show significant differences in delivery times across locations - Location preferences vary for taco types and sizes - Revenue efficiency varies significantly by geographic market

Location-Specific Recommendations:

  1. Austin: Model for delivery efficiency - study and replicate processes
  2. NA: Focus on capacity optimization and scaling operations
  3. NA: Analyze pricing strategies for potential expansion to other markets
  4. NA: Study customer service practices that drive higher tips
  5. Market Expansion: Consider standardizing best practices across all locations

Categorical Analysis Benefits:

  • Market Segmentation: Each location represents a distinct market with unique characteristics
  • Targeted Strategies: Different locations require different operational approaches
  • Performance Benchmarking: Clear ranking system for location comparison
  • Resource Allocation: Data-driven decisions for staffing and inventory by location

This analysis provides insights into taco delivery operations and can be extended with additional data for more comprehensive business intelligence.