Load Required Libraries

library(dplyr)
library(tidyr)
library(knitr)

Load Dataset

data <- read.csv("data.csv", stringsAsFactors = FALSE)
data <- janitor::clean_names(data)
head(data)
##   restaurant_name dining_rating delivery_rating dining_votes delivery_votes
## 1      Doner King           3.9             4.2           39              0
## 2      Doner King           3.9             4.2           39              0
## 3      Doner King           3.9             4.2           39              0
## 4      Doner King           3.9             4.2           39              0
## 5      Doner King           3.9             4.2           39              0
## 6      Doner King           3.9             4.2           39              0
##     cuisine place_name       city                         item_name best_seller
## 1 Fast Food   Malakpet  Hyderabad               Platter Kebab Combo  BESTSELLER
## 2 Fast Food   Malakpet  Hyderabad           Chicken Rumali Shawarma  BESTSELLER
## 3 Fast Food   Malakpet  Hyderabad            Chicken Tandoori Salad            
## 4 Fast Food   Malakpet  Hyderabad                 Chicken BBQ Salad  BESTSELLER
## 5 Fast Food   Malakpet  Hyderabad          Special Doner Wrap Combo    MUST TRY
## 6 Fast Food   Malakpet  Hyderabad Chicken Tandoori Pizza [8 inches]  BESTSELLER
##   votes prices
## 1    84    249
## 2    45    129
## 3    39    189
## 4    43    189
## 5    31    205
## 6    48    199

Level 1: Understanding the Data (Basic Exploration)


Question 1.1: What is the structure of the dataset (number of rows, columns, and data types)?

str(data)
## 'data.frame':    123657 obs. of  12 variables:
##  $ restaurant_name: chr  "Doner King" "Doner King" "Doner King" "Doner King" ...
##  $ dining_rating  : num  3.9 3.9 3.9 3.9 3.9 3.9 3.9 3.9 3.9 3.9 ...
##  $ delivery_rating: num  4.2 4.2 4.2 4.2 4.2 4.2 4.2 4.2 4.2 4.2 ...
##  $ dining_votes   : int  39 39 39 39 39 39 39 39 39 39 ...
##  $ delivery_votes : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ cuisine        : chr  "Fast Food" "Fast Food" "Fast Food" "Fast Food" ...
##  $ place_name     : chr  "Malakpet" "Malakpet" "Malakpet" "Malakpet" ...
##  $ city           : chr  " Hyderabad" " Hyderabad" " Hyderabad" " Hyderabad" ...
##  $ item_name      : chr  "Platter Kebab Combo" "Chicken Rumali Shawarma" "Chicken Tandoori Salad" "Chicken BBQ Salad" ...
##  $ best_seller    : chr  "BESTSELLER" "BESTSELLER" "" "BESTSELLER" ...
##  $ votes          : int  84 45 39 43 31 48 27 59 29 31 ...
##  $ prices         : num  249 129 189 189 205 199 165 165 115 129 ...

Question 1.2: Are there any missing value in the Dataset?

colSums(is.na(data))
## restaurant_name   dining_rating delivery_rating    dining_votes  delivery_votes 
##               0           32236            1280               0               0 
##         cuisine      place_name            city       item_name     best_seller 
##               0               0               0               0               0 
##           votes          prices 
##               0               0

Question 1.3: What is the total number of unique restaurants, cities, and cuisines in the dataset?

length(unique(data$restaurant_name))
## [1] 826
length(unique(data$city))
## [1] 17
length(unique(data$cuisine))
## [1] 48

Question 1.4: Which cuisines have the highest average dining ratings?

data %>% 
  group_by(cuisine) %>% 
  summarise(avg_rating = mean(dining_rating,na.rm = TRUE)) %>% 
  arrange(desc(avg_rating))
## # A tibble: 48 × 2
##    cuisine  avg_rating
##    <chr>         <dbl>
##  1 Awadhi         4.5 
##  2 Mexican        4.4 
##  3 Wraps          4.3 
##  4 Andhra         4.2 
##  5 Turkish        4.1 
##  6 Bakery         4.09
##  7 Seafood        4.05
##  8 Shake          3.98
##  9 Pasta          3.93
## 10 Desserts       3.90
## # ℹ 38 more rows

Question 1.5: What is the average dining rating and delivery rating across all restaurants?

colMeans(data[, c("dining_rating", "delivery_rating")], na.rm = TRUE)
##   dining_rating delivery_rating 
##        3.822264        3.963184

Level 2: Data Extraction & Filtering


Question 2.1: Which restauranlts have high dining ratings but low customer engagement?

low_engagement <- data %>% 
  filter(dining_rating >= 4.5 & dining_votes < 50) %>% 
  group_by(restaurant_name) %>% 
  slice(1)
head(low_engagement)
## # A tibble: 6 × 12
## # Groups:   restaurant_name [6]
##   restaurant_name      dining_rating delivery_rating dining_votes delivery_votes
##   <chr>                        <dbl>           <dbl>        <int>          <int>
## 1 AB's - Absolute Bar…           4.7             3.7            0              0
## 2 Brik Oven                      4.6             3.9            0              0
## 3 Chaitanya                      4.5             4.4            0              0
## 4 Chili's Grill & Bar            4.5             4.1            0              0
## 5 Dastarkhwan                    4.5             4              0              0
## 6 Exotica                        4.6             4.3            0              0
## # ℹ 7 more variables: cuisine <chr>, place_name <chr>, city <chr>,
## #   item_name <chr>, best_seller <chr>, votes <int>, prices <dbl>

Questions 2.2: Identify restaurants where delivery ratings are higher than dining ratings.

delivery <- data %>% 
  filter(delivery_rating > dining_rating) %>% 
  distinct(restaurant_name, .keep_all = TRUE )

head(delivery)
##          restaurant_name dining_rating delivery_rating dining_votes
## 1             Doner King           3.9             4.2           39
## 2              BrownBear           3.6             4.0          239
## 3 The Thickshake Factory           3.4             3.8           38
## 4             McDonald's           3.2             3.9          137
## 5      Mughal Restaurant           3.8             4.1          258
## 6     Tipsy Topsy Bakery           3.8             3.9          225
##   delivery_votes   cuisine     place_name       city
## 1              0 Fast Food       Malakpet  Hyderabad
## 2              0 Fast Food Himayath Nagar  Hyderabad
## 3              0 Beverages Himayath Nagar  Hyderabad
## 4              0 Fast Food       MPM Mall  Hyderabad
## 5              0  Desserts     Lakdikapul  Hyderabad
## 6              0   Chinese   Saroor Nagar  Hyderabad
##                                                     item_name best_seller votes
## 1                                         Platter Kebab Combo  BESTSELLER    84
## 2                                              Pineapple Cake                 0
## 3                               Belgian Chocolate Thick Shake                22
## 4 Big Spicy Paneer Wrap + Coke + Fries (M) + Veg Pizza McPuff                 0
## 5                                             Chicken Biryani  BESTSELLER   245
## 6                                        Chicken Soft Noodles  BESTSELLER   401
##   prices
## 1 249.00
## 2 500.00
## 3 241.00
## 4 367.86
## 5 229.00
## 6 165.00

Questions 2.4: Identify expensive items (top 10% by price) across all restaurants.

threshold <- quantile(data$prices, 0.90, na.rm = TRUE)

expensive_items <- data %>%
  filter(prices > threshold)

head(expensive_items)
##   restaurant_name dining_rating delivery_rating dining_votes delivery_votes
## 1       Taco Bell           4.3             3.7          117              0
## 2       BrownBear           3.6             4.0          239              0
## 3       BrownBear           3.6             4.0          239              0
## 4       BrownBear           3.6             4.0          239              0
## 5       BrownBear           3.6             4.0          239              0
## 6       BrownBear           3.6             4.0          239              0
##     cuisine             place_name       city              item_name
## 1     Wraps The Next Galleria Mall  Hyderabad Cheese Max Box Non-Veg
## 2 Fast Food         Himayath Nagar  Hyderabad         Pineapple Cake
## 3 Fast Food         Himayath Nagar  Hyderabad      Black Forest Cake
## 4 Fast Food         Himayath Nagar  Hyderabad        Red Velvet Cake
## 5 Fast Food         Himayath Nagar  Hyderabad      Butterscotch Cake
## 6 Fast Food         Himayath Nagar  Hyderabad         Pineapple Cake
##   best_seller votes prices
## 1                 0    449
## 2                 0    500
## 3                 0    550
## 4                 0    700
## 5                 5    550
## 6                 0    500