Zomato is an Indian restaurant search and discovery service founded in 2008 by Deepinder Goyal and Pankaj Chaddah. It currently operates in 23 countries, including Australia and United States. It provides information and reviews on restaurants, including images of menus where the restaurant does not have its own website. From zomato’s wikipedia
library(tidyverse)
## ── Attaching packages ────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1.9000 ✔ purrr 0.2.4
## ✔ tibble 1.4.2 ✔ dplyr 0.7.4
## ✔ tidyr 0.8.0 ✔ stringr 1.3.0
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::vars() masks ggplot2::vars()
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(ggmap)
## Google Maps API Terms of Service: http://developers.google.com/maps/terms.
## Please cite ggmap if you use it: see citation("ggmap") for details.
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
zomato <- fread("https://s3-ap-southeast-2.amazonaws.com/koki25ando/zomato.csv", data.table = FALSE)
names(zomato)[3] <- "Country.Code"
names(zomato)[18] <- "Aggregate.rating"
names(zomato)[20] <- "Rating.text"
world.map <- map_data ("world")
ggplot() +
geom_map(data=world.map, map=world.map,
aes(x=long, y=lat, group=group, map_id=region),
fill="white", colour="black") +
geom_point(data = zomato, aes(x = Longitude, y = Latitude),
colour = "red", alpha = .3) +
labs(title = "World Map of Food Restaurants from zomato dataset")
## Warning: Ignoring unknown aesthetics: x, y
Seems like there are many data obeject from India. Thus, in this report, I am going to focus on food restaurant in India.
india.zomato <- zomato %>% filter(Country.Code == 1)
india.map <- geocode("india")
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=india
india.coor <- c(lon = india.map$lon, lat = india.map$lat)
india.ggmap <- get_map(india.coor, zoom = 5, scale = 1)
## Source : https://maps.googleapis.com/maps/api/staticmap?center=20.593684,78.96288&zoom=5&size=640x640&scale=1&maptype=terrain&language=en-EN
ggmap(india.ggmap, base_layer = ggplot(india.zomato, aes(Longitude, Latitude))) +
geom_point(colour = "red", alpha = .3, size = .3) +
labs(title = "India's zomato data visualization")
## Warning: Removed 497 rows containing missing values (geom_point).
india.zomato$City %>% table()
## .
## Agra Ahmedabad Allahabad Amritsar Aurangabad
## 20 21 20 21 20
## Bangalore Bhopal Bhubaneshwar Chandigarh Chennai
## 20 20 21 18 20
## Coimbatore Dehradun Faridabad Ghaziabad Goa
## 20 20 251 25 20
## Gurgaon Guwahati Hyderabad Indore Jaipur
## 1118 21 18 20 20
## Kanpur Kochi Kolkata Lucknow Ludhiana
## 20 20 20 21 20
## Mangalore Mohali Mumbai Mysore Nagpur
## 20 1 20 20 20
## Nashik New Delhi Noida Panchkula Patna
## 20 5473 1080 1 20
## Puducherry Pune Ranchi Secunderabad Surat
## 20 20 20 2 20
## Vadodara Varanasi Vizag
## 20 20 20
New Delhi has the most of the restaurants in India.
nd.zomato <- india.zomato %>% filter(City == "New Delhi")
new.delhi <- geocode("new delhi")
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=new%20delhi
nd.coor <- c(lon = new.delhi$lon, lat = new.delhi$lat)
nd.map <- get_map(nd.coor, zoom = 12, scale = 1)
## Source : https://maps.googleapis.com/maps/api/staticmap?center=28.613939,77.209021&zoom=12&size=640x640&scale=1&maptype=terrain&language=en-EN
ggmap(nd.map) +
geom_point(data = nd.zomato,
aes(x = Longitude, y = Latitude, colour = Aggregate.rating), alpha = .6, size = 1) +
scale_colour_gradient(low = "yellow", high = "red", name = "Rating Score") +
labs(title = "Restaurants in New Delhi")
## Warning: Removed 950 rows containing missing values (geom_point).
At this time, I tiried using ggmap package for the first time. Ggmap package enable us to access the google map images.
Restaurants are scattered throughout the city.
ggplot(nd.zomato) + geom_histogram(aes(Aggregate.rating)) +
labs(x = "Rating Score")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
nd.zomato <- nd.zomato %>% filter(Aggregate.rating != 0)
Removed useless objects that are, in this case, restaurants with 0 raging point.
mean.nd.restaurants <- mean(nd.zomato$Aggregate.rating)
ggplot(nd.zomato) + geom_histogram(aes(Aggregate.rating)) +
geom_vline(xintercept = mean(nd.zomato$Aggregate.rating), linetype="dotted",
color = "blue", size=1.5) +
geom_text(aes(x=mean.nd.restaurants,
label=paste("Mean",":", round(mean.nd.restaurants,3)), y = 200),
colour="red",vjust = 3, text=element_text(size=11), family = "Times New Roman") +
labs(title = "Rating score of food restaurants in New Delhi", x = "Rating Score")
## Warning: Ignoring unknown parameters: text
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Plotting the mean of rating of restaurants.
top.nd.zomato <- nd.zomato %>% filter(Aggregate.rating > mean.nd.restaurants)
ggmap(nd.map) +
geom_point(data = top.nd.zomato,
aes(x = Longitude, y = Latitude, colour = Aggregate.rating), alpha = .6, size = 1) +
scale_colour_gradient(low = "yellow", high = "red", name = "Rating Score") +
labs(title = "Popular Restaurants in New Delhi")
## Warning: Removed 161 rows containing missing values (geom_point).
food.type <- top.nd.zomato$Cuisines
japanese.restaurant <- str_detect(food.type, pattern = "Japanese")
top.nd.zomato[japanese.restaurant,] %>% nrow()
## [1] 38
Unfortunately only 38 restaurants offer japanese food. The answer is No. Japanese food are not popular in New Delhi.
food.category <- food.type %>% str_split(",", simplify = TRUE)
food.category <- data.frame(table(food.category))
food.category <- food.category[-1,]
food.category %>% arrange(desc(Freq)) %>% top_n(15) %>%
ggplot(aes(x = reorder(x = food.category, X = desc(Freq)), y = Freq)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
labs(title = "Popular Food in New Delhi", x = "Food Category")
## Selecting by Freq
There are some duplications for some reason. However, it is apparent that North Indian, Chinese and Italian food restaurants are popular in the city.
Thus, in this article, I’m going to focus on them.
pop.food <- top.nd.zomato$Cuisines %>% str_split(pattern = ",") %>% str_detect(c("North Indian", "Chinese", "Italian"))
## Warning in stri_detect_regex(string, pattern, opts_regex = opts(pattern)):
## longer object length is not a multiple of shorter object length
pop.top.nd.zomato <- top.nd.zomato[pop.food,]
ggmap(nd.map) +
geom_point(data = pop.top.nd.zomato,
aes(x = Longitude, y = Latitude, colour = Rating.text)) +
labs(title = "Popular food restaurants in New Delhi") +
scale_colour_discrete(name = "Rating")
## Warning: Removed 47 rows containing missing values (geom_point).
top10.pop.top.nd.zomato <-
pop.top.nd.zomato %>%
group_by(Locality) %>%
summarise(Average.rating = mean(Aggregate.rating)) %>%
arrange(desc(Average.rating)) %>% head(n = 10)
top10.pop.top.nd.zomato %>%
ggplot(aes(x = as.factor(Locality), y = Average.rating)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 70, vjust = .5)) +
labs(title = "Top 10 rating Suburbs", x = "Suburb Name")
Extracted top 10 Suburb
suburb <- top10.pop.top.nd.zomato$Locality
ggmap(nd.map) +
geom_point(data = pop.top.nd.zomato %>%
filter(Locality %in% suburb) %>%
group_by(Locality) %>%
mutate(average.Rate = mean(Aggregate.rating)),
aes(x = Longitude, y = Latitude, size = average.Rate, colour = Locality)) +
labs(title = "Map visualization of Top 10 Suburb's Rating score") +
scale_colour_discrete(name = "Suburb Name")
## Warning: Removed 1 rows containing missing values (geom_point).