Taco Delivery Analysis: Distance, Tip, and Location

Load Data

taco <- read.csv("taco.csv")

# Preview structure
str(taco)
'data.frame':   1000 obs. of  13 variables:
 $ Order_ID         : int  770487 671858 688508 944962 476417 678856 183667 379946 771088 694731 ...
 $ Restaurant_Name  : chr  "El Taco Loco" "El Taco Loco" "Taco Haven" "Spicy Taco House" ...
 $ Location         : chr  "New York" "San Antonio" "Austin" "Dallas" ...
 $ Order_Time       : chr  "1/8/2024 14:55" "23-11-2024 17:11" "21-11-2024 20:24" "21-09-2024 06:43" ...
 $ Delivery_Time    : chr  "1/8/2024 15:36" "23-11-2024 17:25" "21-11-2024 21:02" "21-09-2024 07:28" ...
 $ Delivery_Duration: int  41 14 38 45 15 83 45 31 17 73 ...
 $ Taco_Size        : chr  "Regular" "Regular" "Large" "Regular" ...
 $ Taco_Type        : chr  "Chicken Taco" "Beef Taco" "Pork Taco" "Chicken Taco" ...
 $ Toppings_Count   : int  5 1 2 2 0 0 1 3 2 1 ...
 $ Distance         : num  3.01 6.2 20.33 3 24.34 ...
 $ Price            : num  9.25 4.25 7 5.5 4.5 3 5.75 6.75 5.5 5.75 ...
 $ Tip              : num  2.22 3.01 0.02 1.9 1.14 2.32 0.63 2.97 0.33 1.23 ...
 $ Weekend_Order    : logi  FALSE TRUE FALSE TRUE FALSE FALSE ...

Clean and Prepare Data

# Rename columns for clarity
colnames(taco)[2:4] <- c("Vendor", "City", "Order_Time")
taco_clean <- taco %>%
  select(City, Distance, Tip) %>%
  filter(!is.na(Distance), !is.na(Tip))

Distance vs Tip (Correlation)

# Correlation value
cor(taco_clean$Distance, taco_clean$Tip)
[1] 0.007936478
# Scatter plot with regression line
ggplot(taco_clean, aes(x = Distance, y = Tip)) +
  geom_point(alpha = 0.6, color = "darkgreen") +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  labs(title = "Tip vs Distance",
       x = "Distance (miles)",
       y = "Tip ($)")
`geom_smooth()` using formula = 'y ~ x'

Tips by Location

# Average tip by city
taco_clean %>%
  group_by(City) %>%
  summarise(Average_Tip = mean(Tip, na.rm = TRUE),
            Orders = n()) %>%
  arrange(desc(Average_Tip)) %>%
  ggplot(aes(x = reorder(City, -Average_Tip), y = Average_Tip, fill = City)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  labs(title = "Average Tip by City",
       x = "City",
       y = "Average Tip ($)")