This is Restaurant Data Analysis report that is created in RStudio using R markdown.

library(plyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(knitr)
library(gtools)
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, last
library(plotrix)

First of all, lets join all the datasets that belongs to all the restaurants.

#reading data

accepts <- read.csv("chefmozaccepts.csv", stringsAsFactors = FALSE)
cuisine <- read.csv("chefmozcuisine.csv", stringsAsFactors = FALSE)
hours <- read.csv("chefmozhours4.csv", stringsAsFactors = FALSE)
parking <- read.csv("chefmozparking.csv", stringsAsFactors = FALSE)
places <- read.csv("geoplaces2.csv", stringsAsFactors = FALSE)
user_cuisine <- read.csv("usercuisine.csv", stringsAsFactors = FALSE)
user_payment <- read.csv("userpayment.csv", stringsAsFactors = FALSE)
rating_bar <- read.csv("rating_final.csv",stringsAsFactors = FALSE)
user_profile <- read.csv("userprofile.csv",stringsAsFactors = FALSE)

 Mode <- function(x) {
     u <- unique(x)
     u[which.max(tabulate(match(x, u)))]
 }
 
 
  Mode2 <- function(x) {
     u <- unique(x)
     u[which.min(tabulate(match(x, u)))]
 }


#binding data
rest_all <- smartbind(accepts, cuisine, hours, parking, places)

Modes of Payment in Restaurants Analysis

Here we are analysing the modes of payments that are accepted by the restaurants

count <- table(accepts$Rpayment)
barplot(count, col = "green", las = 2, ylab = "Frequency")

summary(accepts)
##     placeID         Rpayment        
##  Min.   :132002   Length:1314       
##  1st Qu.:132580   Class :character  
##  Median :132789   Mode  :character  
##  Mean   :133219                     
##  3rd Qu.:133036                     
##  Max.   :135110

Different types of payment options in Restaurants

m <- data.frame(count)
kable(m)
Var1 Freq
American_Express 153
bank_debit_cards 130
Carte_Blanche 7
cash 500
checks 10
Diners_Club 42
Discover 11
gift_certificates 7
Japan_Credit_Bureau 5
MasterCard-Eurocard 194
Visa 83
VISA 172

And the minimum frequency of payment is :-

Mode(table(accepts$Rpayment))
## [1] 7

And the maximum frequency of payment is :-

Mode2(table(accepts$Rpayment))
## [1] 153

Cuisines Analysis

Here is the analysis of different types of cuisines that are mainly consumed by the people of United States of America :-

count_cuisine <- table(cuisine$Rcuisine)
plot(count_cuisine,type = "o", col = "purple", ylab = "Frequency")

summary(cuisine)
##     placeID         Rcuisine        
##  Min.   :132001   Length:916        
##  1st Qu.:132323   Class :character  
##  Median :132631   Mode  :character  
##  Mean   :132897                     
##  3rd Qu.:132907                     
##  Max.   :135110

Different types of payment options in Restaurants

c <- data.frame(count_cuisine)
kable(c)
Var1 Freq
Afghan 1
African 3
American 59
Armenian 5
Asian 7
Bagels 1
Bakery 6
Bar 32
Bar_Pub_Brewery 24
Barbecue 3
Brazilian 1
Breakfast-Brunch 3
Burgers 13
Cafe-Coffee_Shop 27
Cafeteria 23
California 1
Caribbean 1
Chinese 21
Contemporary 9
Continental-European 4
Deli-Sandwiches 9
Dessert-Ice_Cream 3
Diner 3
Dutch-Belgian 55
Eastern_European 2
Ethiopian 1
Family 14
Fast_Food 20
Fine_Dining 1
French 31
Game 2
German 14
Greek 33
Hot_Dogs 7
International 62
Italian 42
Japanese 17
Juice 6
Korean 1
Latin_American 7
Mediterranean 13
Mexican 239
Mongolian 1
Organic-Healthy 1
Persian 1
Pizzeria 25
Polish 5
Regional 3
Seafood 18
Soup 1
Southern 1
Southwestern 3
Spanish 3
Steaks 8
Sushi 6
Thai 1
Turkish 1
Vegetarian 10
Vietnamese 2

Cuisine that is made most in Mexican Restaurants :-

Mode2(table(cuisine$Rcuisine))
## [1] 59

Cuisine that is made least in Mexican Restaurants :-

Mode(table(cuisine$Rcuisine))
## [1] 1

Now, lets see the parking facilities of all the Mexican Restaurants. Through this analysis, we can see, how many restaurants have better parking facilities than the others.

parking_count <- table(parking$parking_lot)
plot(parking_count, type = "o", col = "blue", main = "Restaurant Parking Analysis", las = 2, ylab = "Frequency")

points(which.max(parking_count),max(parking_count),pch = 19,col = "red") 
points(which.min(parking_count),min(parking_count),pch = 19,col = "green")

text(x = which.max(parking_count), y = max(parking_count), labels = Mode(parking$parking_lot))
text(x = which.min(parking_count), y = min(parking_count), labels = Mode2(parking$parking_lot))

The percentage of restaurants that don’t have parking facilities is :-

percentage <- sum(parking$parking_lot == "none")*100/sum(table(parking$parking_lot))
round(percentage,2)
## [1] 49.57

And, the percentage of restaurants that have valet parking, which represents best quality restaurants is :-

percentage2 <- sum(parking$parking_lot == "valet parking")*100/sum(table(parking$parking_lot))
round(percentage2,2)
## [1] 2.99

Different types of parking options in Restaurants

c2 <- data.frame(parking_count)
kable(c2)
Var1 Freq
fee 22
none 348
public 102
street 32
valet parking 21
validated parking 3
yes 174

Now, lets see the Mexican states having most number of non-alcohol serving restaurants :-

m <- data.frame(places$alcohol, places$city)
l <- subset(m, m$places.alcohol == "No_Alcohol_Served")
plotter <- table(l$places.city)

plot(plotter, type = "o", col = "red", main = "Non-Alcohol Serving Restaurants", las = 2, ylab = "Frequency")

points(which.max(plotter),max(plotter),pch = 19,col = "blue") 
points(which.min(plotter),min(plotter),pch = 19,col = "green")

text(x = which.max(plotter), y = max(plotter), labels = Mode(m$places.city))
text(x = which.min(plotter), y = min(plotter), labels = Mode2(m$places.city))

The number of alcohol serving restaurants are :-

l_bar <- subset(m, (m$places.alcohol == "Wine-Beer" | m$places.alcohol == "Full_Bar"))
plotter_bar <- table(l_bar$places.city)
plot(plotter_bar, type = "o", col = "orange", main = "Alcohol Serving Restaurants", las = 2, ylab ="Frequency")
  
points(which.max(plotter_bar),max(plotter_bar),pch = 19,col = "blue") 
points(which.min(plotter_bar),min(plotter_bar),pch = 19,col = "green")

text(x = which.max(plotter_bar), y = max(plotter_bar), labels = Mode(l_bar$places.city))
text(x = which.min(plotter_bar), y = min(plotter_bar), labels = Mode2(l_bar$places.city))

Here’s the analysis of the restaurants’ budgets on the basis of price

budget <- data.frame(places$price, places$city)


la <- sum(budget$places.price == "low")
lb <- sum(budget$places.price == "medium")
lc <- sum(budget$places.price == "high")

all <- c(la,lb,lc)
lbls <- c("low", "medium", "high")
pie(all,lbls, main = "Budget wise restaurant analysis", col=rainbow(length(lbls)))

Percentage of Low price restaurants in Mexico :-

la*100/(la+lb+lc)
## [1] 34.61538

Percentage of Medium price restaurants in Mexico :-

lb*100/(la+lb+lc)
## [1] 46.15385

Percentage of High price restaurants in Mexico :-

lc*100/(la+lb+lc)
## [1] 19.23077

Through this analysis, we can say that Mexico is a Middle Class Economy

Customer favourites Cuisines Analysis

Here is the analysis of different types of cuisines that are mainly consumed by the people of Mexico vs the cuisines served by the restaurants :-

count_cuisine2 <- table(user_cuisine$Rcuisine)
plot(count_cuisine2,type = "o", col = "yellow", ylab = "Frequency")
lines(count_cuisine,type = "o", col = "purple", ylab = "Frequency")

Here, in this graph, the “Yellow” coloured line represents customers’ choice of food and the “Purple” coloured line represents Restaurants’ choice of cuisine serving.

Modes of Payment in Restaurants Analysis

Now lets see what are the costumers’ prefrences of making payments. Here we are analysing the modes of payments that are accepted by the restaurants.

costumer_count <- table(user_payment$Upayment)
barplot(costumer_count, col = "dark green", las = 2, ylab = "Frequency")

Restaurant Final Rating

Now lets analyse the overall rating of restaurants in Mexico

rating_all <- data.frame(rating_bar$rating, rating_bar$food_rating, rating_bar$service_rating)
all1 <- subset(rating_all, (rating_all$rating_bar.rating == 2 & rating_all$rating_bar.food_rating == 2 & rating_bar$service_rating == 2))

The pie chart for best restaurants vs all other restaurants :-

ok <- c(1161, sum(all1$rating_bar.rating))
lbls2 <- c("normal", "best")

pie(ok, lbls2, main = "Restaurants rating", col=rainbow(length(lbls2)))

User Profiles

profile_frame <- data.frame(user_profile$ambience, user_profile$interest, user_profile$religion, user_profile$activity, user_profile$budget)

This plot denotes different ambiences of Mexican Restaurants :-

 p <- table(profile_frame$user_profile.ambience)
barplot(p, col = "grey", las = 2, ylab = "Frequency")

p1 <- data.frame(p)
kable(p1)
Var1 Freq
? 6
family 70
friends 46
solitary 16

This plot denotes the area of interest of people coming to restaurants :-

q <- table(profile_frame$user_profile.interest)

barplot(q, col = "blue", las = 2, ylab = "Frequency")

q1 <- data.frame(q)
kable(q1)
Var1 Freq
eco-friendly 16
none 30
retro 6
technology 36
variety 50

This plot denotes the class of people that are coming to the restaurants :-

r <- table(profile_frame$user_profile.activity)
barplot(r, col = "green", las = 2, ylab = "Frequency")

r1 <- data.frame(r)

kable(r1)
Var1 Freq
? 7
professional 15
student 113
unemployed 2
working-class 1

This plot denotes the budget of people that are coming to the restaurants :-

s <- table(profile_frame$user_profile.budget)
 barplot(s, col = "pink", las = 2, ylab = "Frequency")

 s1 <- data.frame(s)
kable(s1)
Var1 Freq
? 7
high 5
low 35
medium 91

Name : Konark Yadav

BTech(ECE)

Email : k.yadav2704@gmail.com

Alternate Email : konarkyadav.y12@lnmiit.ac.in