This is Restaurant Data Analysis report that is created in RStudio using R markdown.
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(knitr)
library(gtools)
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, last
library(plotrix)
First of all, lets join all the datasets that belongs to all the restaurants.
#reading data
accepts <- read.csv("chefmozaccepts.csv", stringsAsFactors = FALSE)
cuisine <- read.csv("chefmozcuisine.csv", stringsAsFactors = FALSE)
hours <- read.csv("chefmozhours4.csv", stringsAsFactors = FALSE)
parking <- read.csv("chefmozparking.csv", stringsAsFactors = FALSE)
places <- read.csv("geoplaces2.csv", stringsAsFactors = FALSE)
user_cuisine <- read.csv("usercuisine.csv", stringsAsFactors = FALSE)
user_payment <- read.csv("userpayment.csv", stringsAsFactors = FALSE)
rating_bar <- read.csv("rating_final.csv",stringsAsFactors = FALSE)
user_profile <- read.csv("userprofile.csv",stringsAsFactors = FALSE)
Mode <- function(x) {
u <- unique(x)
u[which.max(tabulate(match(x, u)))]
}
Mode2 <- function(x) {
u <- unique(x)
u[which.min(tabulate(match(x, u)))]
}
#binding data
rest_all <- smartbind(accepts, cuisine, hours, parking, places)
Here we are analysing the modes of payments that are accepted by the restaurants
count <- table(accepts$Rpayment)
barplot(count, col = "green", las = 2, ylab = "Frequency")
summary(accepts)
## placeID Rpayment
## Min. :132002 Length:1314
## 1st Qu.:132580 Class :character
## Median :132789 Mode :character
## Mean :133219
## 3rd Qu.:133036
## Max. :135110
m <- data.frame(count)
kable(m)
| Var1 | Freq |
|---|---|
| American_Express | 153 |
| bank_debit_cards | 130 |
| Carte_Blanche | 7 |
| cash | 500 |
| checks | 10 |
| Diners_Club | 42 |
| Discover | 11 |
| gift_certificates | 7 |
| Japan_Credit_Bureau | 5 |
| MasterCard-Eurocard | 194 |
| Visa | 83 |
| VISA | 172 |
And the minimum frequency of payment is :-
Mode(table(accepts$Rpayment))
## [1] 7
And the maximum frequency of payment is :-
Mode2(table(accepts$Rpayment))
## [1] 153
Here is the analysis of different types of cuisines that are mainly consumed by the people of United States of America :-
count_cuisine <- table(cuisine$Rcuisine)
plot(count_cuisine,type = "o", col = "purple", ylab = "Frequency")
summary(cuisine)
## placeID Rcuisine
## Min. :132001 Length:916
## 1st Qu.:132323 Class :character
## Median :132631 Mode :character
## Mean :132897
## 3rd Qu.:132907
## Max. :135110
c <- data.frame(count_cuisine)
kable(c)
| Var1 | Freq |
|---|---|
| Afghan | 1 |
| African | 3 |
| American | 59 |
| Armenian | 5 |
| Asian | 7 |
| Bagels | 1 |
| Bakery | 6 |
| Bar | 32 |
| Bar_Pub_Brewery | 24 |
| Barbecue | 3 |
| Brazilian | 1 |
| Breakfast-Brunch | 3 |
| Burgers | 13 |
| Cafe-Coffee_Shop | 27 |
| Cafeteria | 23 |
| California | 1 |
| Caribbean | 1 |
| Chinese | 21 |
| Contemporary | 9 |
| Continental-European | 4 |
| Deli-Sandwiches | 9 |
| Dessert-Ice_Cream | 3 |
| Diner | 3 |
| Dutch-Belgian | 55 |
| Eastern_European | 2 |
| Ethiopian | 1 |
| Family | 14 |
| Fast_Food | 20 |
| Fine_Dining | 1 |
| French | 31 |
| Game | 2 |
| German | 14 |
| Greek | 33 |
| Hot_Dogs | 7 |
| International | 62 |
| Italian | 42 |
| Japanese | 17 |
| Juice | 6 |
| Korean | 1 |
| Latin_American | 7 |
| Mediterranean | 13 |
| Mexican | 239 |
| Mongolian | 1 |
| Organic-Healthy | 1 |
| Persian | 1 |
| Pizzeria | 25 |
| Polish | 5 |
| Regional | 3 |
| Seafood | 18 |
| Soup | 1 |
| Southern | 1 |
| Southwestern | 3 |
| Spanish | 3 |
| Steaks | 8 |
| Sushi | 6 |
| Thai | 1 |
| Turkish | 1 |
| Vegetarian | 10 |
| Vietnamese | 2 |
Cuisine that is made most in Mexican Restaurants :-
Mode2(table(cuisine$Rcuisine))
## [1] 59
Cuisine that is made least in Mexican Restaurants :-
Mode(table(cuisine$Rcuisine))
## [1] 1
Now, lets see the parking facilities of all the Mexican Restaurants. Through this analysis, we can see, how many restaurants have better parking facilities than the others.
parking_count <- table(parking$parking_lot)
plot(parking_count, type = "o", col = "blue", main = "Restaurant Parking Analysis", las = 2, ylab = "Frequency")
points(which.max(parking_count),max(parking_count),pch = 19,col = "red")
points(which.min(parking_count),min(parking_count),pch = 19,col = "green")
text(x = which.max(parking_count), y = max(parking_count), labels = Mode(parking$parking_lot))
text(x = which.min(parking_count), y = min(parking_count), labels = Mode2(parking$parking_lot))
The percentage of restaurants that don’t have parking facilities is :-
percentage <- sum(parking$parking_lot == "none")*100/sum(table(parking$parking_lot))
round(percentage,2)
## [1] 49.57
And, the percentage of restaurants that have valet parking, which represents best quality restaurants is :-
percentage2 <- sum(parking$parking_lot == "valet parking")*100/sum(table(parking$parking_lot))
round(percentage2,2)
## [1] 2.99
c2 <- data.frame(parking_count)
kable(c2)
| Var1 | Freq |
|---|---|
| fee | 22 |
| none | 348 |
| public | 102 |
| street | 32 |
| valet parking | 21 |
| validated parking | 3 |
| yes | 174 |
Now, lets see the Mexican states having most number of non-alcohol serving restaurants :-
m <- data.frame(places$alcohol, places$city)
l <- subset(m, m$places.alcohol == "No_Alcohol_Served")
plotter <- table(l$places.city)
plot(plotter, type = "o", col = "red", main = "Non-Alcohol Serving Restaurants", las = 2, ylab = "Frequency")
points(which.max(plotter),max(plotter),pch = 19,col = "blue")
points(which.min(plotter),min(plotter),pch = 19,col = "green")
text(x = which.max(plotter), y = max(plotter), labels = Mode(m$places.city))
text(x = which.min(plotter), y = min(plotter), labels = Mode2(m$places.city))
The number of alcohol serving restaurants are :-
l_bar <- subset(m, (m$places.alcohol == "Wine-Beer" | m$places.alcohol == "Full_Bar"))
plotter_bar <- table(l_bar$places.city)
plot(plotter_bar, type = "o", col = "orange", main = "Alcohol Serving Restaurants", las = 2, ylab ="Frequency")
points(which.max(plotter_bar),max(plotter_bar),pch = 19,col = "blue")
points(which.min(plotter_bar),min(plotter_bar),pch = 19,col = "green")
text(x = which.max(plotter_bar), y = max(plotter_bar), labels = Mode(l_bar$places.city))
text(x = which.min(plotter_bar), y = min(plotter_bar), labels = Mode2(l_bar$places.city))
Here’s the analysis of the restaurants’ budgets on the basis of price
budget <- data.frame(places$price, places$city)
la <- sum(budget$places.price == "low")
lb <- sum(budget$places.price == "medium")
lc <- sum(budget$places.price == "high")
all <- c(la,lb,lc)
lbls <- c("low", "medium", "high")
pie(all,lbls, main = "Budget wise restaurant analysis", col=rainbow(length(lbls)))
Percentage of Low price restaurants in Mexico :-
la*100/(la+lb+lc)
## [1] 34.61538
Percentage of Medium price restaurants in Mexico :-
lb*100/(la+lb+lc)
## [1] 46.15385
Percentage of High price restaurants in Mexico :-
lc*100/(la+lb+lc)
## [1] 19.23077
Through this analysis, we can say that Mexico is a Middle Class Economy
Here is the analysis of different types of cuisines that are mainly consumed by the people of Mexico vs the cuisines served by the restaurants :-
count_cuisine2 <- table(user_cuisine$Rcuisine)
plot(count_cuisine2,type = "o", col = "yellow", ylab = "Frequency")
lines(count_cuisine,type = "o", col = "purple", ylab = "Frequency")
Here, in this graph, the “Yellow” coloured line represents customers’ choice of food and the “Purple” coloured line represents Restaurants’ choice of cuisine serving.
Now lets see what are the costumers’ prefrences of making payments. Here we are analysing the modes of payments that are accepted by the restaurants.
costumer_count <- table(user_payment$Upayment)
barplot(costumer_count, col = "dark green", las = 2, ylab = "Frequency")
Now lets analyse the overall rating of restaurants in Mexico
rating_all <- data.frame(rating_bar$rating, rating_bar$food_rating, rating_bar$service_rating)
all1 <- subset(rating_all, (rating_all$rating_bar.rating == 2 & rating_all$rating_bar.food_rating == 2 & rating_bar$service_rating == 2))
The pie chart for best restaurants vs all other restaurants :-
ok <- c(1161, sum(all1$rating_bar.rating))
lbls2 <- c("normal", "best")
pie(ok, lbls2, main = "Restaurants rating", col=rainbow(length(lbls2)))
profile_frame <- data.frame(user_profile$ambience, user_profile$interest, user_profile$religion, user_profile$activity, user_profile$budget)
This plot denotes different ambiences of Mexican Restaurants :-
p <- table(profile_frame$user_profile.ambience)
barplot(p, col = "grey", las = 2, ylab = "Frequency")
p1 <- data.frame(p)
kable(p1)
| Var1 | Freq |
|---|---|
| ? | 6 |
| family | 70 |
| friends | 46 |
| solitary | 16 |
This plot denotes the area of interest of people coming to restaurants :-
q <- table(profile_frame$user_profile.interest)
barplot(q, col = "blue", las = 2, ylab = "Frequency")
q1 <- data.frame(q)
kable(q1)
| Var1 | Freq |
|---|---|
| eco-friendly | 16 |
| none | 30 |
| retro | 6 |
| technology | 36 |
| variety | 50 |
This plot denotes the class of people that are coming to the restaurants :-
r <- table(profile_frame$user_profile.activity)
barplot(r, col = "green", las = 2, ylab = "Frequency")
r1 <- data.frame(r)
kable(r1)
| Var1 | Freq |
|---|---|
| ? | 7 |
| professional | 15 |
| student | 113 |
| unemployed | 2 |
| working-class | 1 |
This plot denotes the budget of people that are coming to the restaurants :-
s <- table(profile_frame$user_profile.budget)
barplot(s, col = "pink", las = 2, ylab = "Frequency")
s1 <- data.frame(s)
kable(s1)
| Var1 | Freq |
|---|---|
| ? | 7 |
| high | 5 |
| low | 35 |
| medium | 91 |
Name : Konark Yadav
BTech(ECE)
Email : k.yadav2704@gmail.com
Alternate Email : konarkyadav.y12@lnmiit.ac.in