This project analyzes Foodpanda order data using R, divided into key sections: basic understanding, aggregation, filtering, trends, business insights, pie chart analysis, box plot analysis, linear regression, and correlation analysis. Using dplyr, ggplot2, and reshape2, the data is processed, analyzed, and visualized to understand order patterns, customer ratings, revenue distribution, and relationships between variables.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(reshape2)
df <- read.csv("Foodpanda Analysis Dataset.csv")
# Creates order_value = price × quantity
df <- df %>% mutate(order_value = price * quantity)
nrow(df)
## [1] 6000
# Counts total number of rows → total orders
Conclusion: Shows the overall scale of the dataset and business activity.
df %>% summarise(unique_restaurants = n_distinct(restaurant_name))
## unique_restaurants
## 1 5
# Counts distinct restaurants
Conclusion: Indicates platform diversity and competition level.
df %>% count(category, sort = TRUE) %>% head(5)
## category n
## 1 Italian 1236
## 2 Fast Food 1222
## 3 Continental 1211
## 4 Chinese 1198
## 5 Dessert 1133
# Counts frequency of each category
Conclusion: Reveals customer food preferences and demand trends.
df %>% summarise(avg_rating = mean(rating, na.rm = TRUE))
## avg_rating
## 1 2.996833
# Calculates mean rating
Conclusion: Reflects overall customer satisfaction on the platform.
city_orders <- df %>% count(city, sort = TRUE)
ggplot(city_orders, aes(x = reorder(city, n), y = n)) +
geom_bar(stat = "identity") +
coord_flip() +
theme_minimal()
# Orders grouped by city
Conclusion: Identifies high-demand cities driving platform usage.
rating_rest <- df %>%
group_by(restaurant_name) %>%
summarise(avg_rating = mean(rating, na.rm = TRUE)) %>%
arrange(desc(avg_rating)) %>%
head(10)
ggplot(rating_rest, aes(x = reorder(restaurant_name, avg_rating), y = avg_rating)) +
geom_bar(stat = "identity") +
coord_flip() +
theme_minimal()
# Average rating per restaurant
Conclusion: Highlights top-performing restaurants in terms of quality.
df %>%
group_by(restaurant_name) %>%
summarise(avg_value = mean(order_value, na.rm = TRUE)) %>%
arrange(desc(avg_value)) %>%
head(1)
## # A tibble: 1 × 2
## restaurant_name avg_value
## <chr> <dbl>
## 1 Pizza Hut 2453.
# Finds highest spending restaurant
Conclusion: Identifies premium restaurants with higher customer spending.
city_revenue <- df %>%
group_by(city) %>%
summarise(revenue = sum(order_value, na.rm = TRUE))
ggplot(city_revenue, aes(x = reorder(city, revenue), y = revenue)) +
geom_bar(stat = "identity") +
coord_flip() +
theme_minimal()
# Revenue aggregated by city
Conclusion: Shows which locations contribute most to total revenue.
cat_revenue <- df %>%
group_by(category) %>%
summarise(revenue = sum(order_value, na.rm = TRUE))
ggplot(cat_revenue, aes(x = reorder(category, revenue), y = revenue)) +
geom_bar(stat = "identity") +
coord_flip() +
theme_minimal()
# Revenue by category
Conclusion: Reveals which cuisines are most profitable.
rating_tiers <- df %>%
mutate(tier = case_when(
rating >= 4.5 ~ "Excellent",
rating >= 3.0 ~ "Average",
TRUE ~ "Poor"
)) %>%
count(tier)
ggplot(rating_tiers, aes(x = "", y = n, fill = tier)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y") +
theme_void()
# Rating distribution
Conclusion: Shows proportion of good vs poor customer experiences.
order_size_rev <- df %>%
mutate(size_type = ifelse(quantity == 1, "Single", "Multi")) %>%
group_by(size_type) %>%
summarise(revenue = sum(order_value, na.rm = TRUE))
ggplot(order_size_rev, aes(x = "", y = revenue, fill = size_type)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y") +
theme_void()
# Revenue split by order size
Conclusion: Indicates whether bulk orders contribute more revenue.
upper_limit <- quantile(df$order_value, 0.95, na.rm = TRUE)
ggplot(df, aes(x = category, y = order_value)) +
geom_boxplot() +
coord_flip(ylim = c(0, upper_limit)) +
theme_minimal()
# Boxplot of order values
Conclusion: Highlights spending variability and outliers across cuisines.
price_upper_limit <- quantile(df$price, 0.95, na.rm = TRUE)
ggplot(df, aes(x = category, y = price)) +
geom_boxplot() +
coord_flip(ylim = c(0, price_upper_limit)) +
theme_minimal()
# Price distribution
Conclusion: Differentiates budget vs premium cuisines.
rest_price_rating <- df %>%
group_by(restaurant_name) %>%
summarise(
avg_item_price = mean(price, na.rm = TRUE),
avg_rating = mean(rating, na.rm = TRUE),
total_orders = n()
) %>%
filter(total_orders > 5)
ggplot(rest_price_rating, aes(x = avg_item_price, y = avg_rating)) +
geom_point() +
geom_smooth(method = "lm") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
# Price vs rating regression
Conclusion: Shows whether higher prices correlate with better ratings.
rest_performance <- df %>%
group_by(restaurant_name) %>%
summarise(total_orders = n(), avg_rating = mean(rating, na.rm = TRUE))
ggplot(rest_performance, aes(x = total_orders, y = avg_rating)) +
geom_point() +
geom_smooth(method = "lm") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
# Orders vs rating relationship
Conclusion: Evaluates if popular restaurants maintain higher ratings.
restaurant_kpis <- df %>%
group_by(restaurant_name) %>%
summarise(
Total_Revenue = sum(order_value, na.rm = TRUE),
Total_Orders = n(),
Avg_Price = mean(price, na.rm = TRUE),
Avg_Rating = mean(rating, na.rm = TRUE)
) %>%
select(-restaurant_name)
# KPI creation
Conclusion: Converts raw data into meaningful business metrics.
kpi_melt <- melt(kpi_cor_matrix)
ggplot(kpi_melt, aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
geom_text(aes(label = round(value, 2))) +
theme_minimal()
# Heatmap visualization
Conclusion: Helps quickly identify strong positive or negative relationships.
The analysis across all sections highlights important patterns in orders, ratings, and revenue. It identifies top-performing categories and cities, detects variability and outliers, and explores relationships between variables using regression and correlation. Overall, the project demonstrates how structured data analysis can generate useful business insights. ````