1. Loading Libraries and Data
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
# Load the dataset
data <- read.csv("INDIA ZOMATO RESTURAUNT ANALYSIS.csv", stringsAsFactors = FALSE)
# View structure
str(data)
## 'data.frame': 8652 obs. of 21 variables:
## $ Restaurant.ID : int 3400025 3400341 3400005 3400021 3400017 3400325 3400059 3400060 3400348 3400072 ...
## $ Restaurant.Name : chr "Jahanpanah" "Rangrezz Restaurant" "Time2Eat - Mama Chicken" "Chokho Jeeman Marwari Jain Bhojanalya" ...
## $ Country.Code : int 1 1 1 1 1 1 1 1 1 1 ...
## $ City : chr "Agra" "Agra" "Agra" "Agra" ...
## $ Address : chr "E 23, Shopping Arcade, Sadar Bazaar, Agra Cantt, Agra" "E-20, Shopping Arcade, Sadar Bazaar, Agra Cantt, Agra" "Main Market, Sadar Bazaar, Agra Cantt, Agra" "1/48, Delhi Gate, Station Road, Raja Mandi, Civil Lines, Agra" ...
## $ Locality : chr "Agra Cantt" "Agra Cantt" "Agra Cantt" "Civil Lines" ...
## $ Locality.Verbose : chr "Agra Cantt, Agra" "Agra Cantt, Agra" "Agra Cantt, Agra" "Civil Lines, Agra" ...
## $ Longitude : num 78 0 78 78 78 ...
## $ Latitude : num 27.2 0 27.2 27.2 27.2 ...
## $ Cuisines : chr "North Indian, Mughlai" "North Indian, Mughlai" "North Indian" "Rajasthani" ...
## $ Average.Cost.for.two: int 850 700 500 400 1000 2000 2500 2500 800 3600 ...
## $ Currency : chr "Indian Rupees(Rs.)" "Indian Rupees(Rs.)" "Indian Rupees(Rs.)" "Indian Rupees(Rs.)" ...
## $ Has.Table.booking : chr "No" "No" "No" "No" ...
## $ Has.Online.delivery : chr "No" "No" "No" "No" ...
## $ Is.delivering.now : chr "No" "No" "No" "No" ...
## $ Switch.to.order.menu: chr "No" "No" "No" "No" ...
## $ Price.range : int 3 2 2 2 3 4 4 4 3 4 ...
## $ Aggregate.rating : num 3.9 3.5 3.6 4 4.2 4 4.3 4 3.6 3.8 ...
## $ Rating.color : chr "Yellow" "Yellow" "Yellow" "Green" ...
## $ Rating.text : chr "Good" "Good" "Good" "Very Good" ...
## $ Votes : int 140 71 94 87 177 45 133 41 59 46 ...
# Check for missing values
colSums(is.na(data))
## Restaurant.ID Restaurant.Name Country.Code
## 0 0 0
## City Address Locality
## 0 0 0
## Locality.Verbose Longitude Latitude
## 0 0 0
## Cuisines Average.Cost.for.two Currency
## 0 0 0
## Has.Table.booking Has.Online.delivery Is.delivering.now
## 0 0 0
## Switch.to.order.menu Price.range Aggregate.rating
## 0 0 0
## Rating.color Rating.text Votes
## 0 0 0
2. Basic Metrics
# Average rating and cost
avg_rating <- mean(data$Aggregate.rating, na.rm = TRUE)
avg_cost <- mean(data$Average.Cost.for.two, na.rm = TRUE)
cat("Average Rating:", avg_rating, "\n")
## Average Rating: 2.523324
cat("Average Cost for Two:", avg_cost, "\n")
## Average Cost for Two: 623.3703
3. Exploratory Data Analysis
3.1 Top 10 Cities with Most Restaurants
top_cities <- data %>% count(City, sort = TRUE) %>% head(10)
print(top_cities)
## City n
## 1 New Delhi 5473
## 2 Gurgaon 1118
## 3 Noida 1080
## 4 Faridabad 251
## 5 Ghaziabad 25
## 6 Ahmedabad 21
## 7 Amritsar 21
## 8 Bhubaneshwar 21
## 9 Guwahati 21
## 10 Lucknow 21
3.2 High Rating & Low Cost Restaurants
high_rating_low_cost <- data %>% filter(Aggregate.rating >= 4.0, Average.Cost.for.two < 300)
print(head(high_rating_low_cost, 5))
## Restaurant.ID Restaurant.Name Country.Code City
## 1 3400346 Sheroes Hangout 1 Agra
## 2 18204507 Ahuja Milk Bhandar 1 Amritsar
## 3 2200175 Gurdas Ram Jalebi Wala 1 Amritsar
## 4 2200153 Kanha Sweets 1 Amritsar
## 5 2600109 Sagar Gaire Fast Food 1 Bhopal
## Address
## 1 Opposite The Gateway Hotel, Fatehabad Road, Tajganj, Agra
## 2 Dhab Khatikan, Near Hindu College, Hathi Gate, Amritsar
## 3 Near Golden Temple, Town Hall, Amritsar
## 4 Shop 1, Opposite Bijli Pehalwan Mandir, Lawrence Road, White Avenue, Amritsar
## 5 10, Number Market, Arera Colony, Bhopal
## Locality Locality.Verbose Longitude Latitude
## 1 Tajganj Tajganj, Agra 78.04017 27.16185
## 2 Hathi Gate Hathi Gate, Amritsar 0.00000 0.00000
## 3 Town Hall Town Hall, Amritsar 0.00000 0.00000
## 4 White Avenue White Avenue, Amritsar 0.00000 0.00000
## 5 Arera Colony Arera Colony, Bhopal 77.43536 23.21429
## Cuisines Average.Cost.for.two Currency
## 1 Cafe, North Indian, Chinese 0 Indian Rupees(Rs.)
## 2 Beverages 100 Indian Rupees(Rs.)
## 3 Mithai 100 Indian Rupees(Rs.)
## 4 Fast Food 150 Indian Rupees(Rs.)
## 5 Fast Food 250 Indian Rupees(Rs.)
## Has.Table.booking Has.Online.delivery Is.delivering.now Switch.to.order.menu
## 1 No No No No
## 2 No No No No
## 3 No No No No
## 4 No No No No
## 5 No No No No
## Price.range Aggregate.rating Rating.color Rating.text Votes
## 1 1 4.9 Dark Green Excellent 77
## 2 1 4.0 Green Very Good 52
## 3 1 4.1 Green Very Good 104
## 4 1 4.1 Green Very Good 140
## 5 1 4.9 Dark Green Excellent 427
3.3 Restaurants with Online Delivery and Table Booking
online_table <- data %>% filter(Has.Table.booking == "Yes", Has.Online.delivery == "Yes")
print(head(online_table, 5))
## Restaurant.ID Restaurant.Name Country.Code City
## 1 50943 Sultans of Spice 1 Bangalore
## 2 58268 The Fatty Bao - Asian Gastro Bar 1 Bangalore
## 3 58882 Big Brewsky 1 Bangalore
## 4 69024 That Madras Place 1 Chennai
## 5 72475 Haunted 1 Chennai
## Address
## 1 BluPetal Hotel, 60 Jyoti Nivas College Road, Koramangala 5th Block, Bangalore
## 2 610, 3rd Floor, 12th Main, Off 80 Feet Road, Indiranagar, Bangalore
## 3 Behind MK Retail, Before WIPRO Corporate Office, Sarjapur Road, Bangalore
## 4 34/29, 2nd Main Road, Kasturibai Nagar, Adyar, Chennai
## 5 273, F13, New Number 71, 2nd Main Road, Anna Nagar East, Chennai
## Locality Locality.Verbose Longitude
## 1 BluPetal Hotel, Koramangala BluPetal Hotel, Koramangala, Bangalore 77.61543
## 2 Indiranagar Indiranagar, Bangalore 77.64540
## 3 Sarjapur Road Sarjapur Road, Bangalore 77.68324
## 4 Adyar Adyar, Chennai 80.25074
## 5 Anna Nagar East Anna Nagar East, Chennai 80.22067
## Latitude Cuisines
## 1 12.93328 North Indian, Mughlai
## 2 12.97022 Asian
## 3 12.91304 Finger Food, North Indian, Italian, Continental, Thai, South Indian
## 4 13.00580 European, Italian, Desserts
## 5 13.08644 North Indian, Chinese, Arabian
## Average.Cost.for.two Currency Has.Table.booking Has.Online.delivery
## 1 1300 Indian Rupees(Rs.) Yes Yes
## 2 2400 Indian Rupees(Rs.) Yes Yes
## 3 1800 Indian Rupees(Rs.) Yes Yes
## 4 800 Indian Rupees(Rs.) Yes Yes
## 5 800 Indian Rupees(Rs.) Yes Yes
## Is.delivering.now Switch.to.order.menu Price.range Aggregate.rating
## 1 No No 3 4.1
## 2 No No 4 4.7
## 3 No No 3 4.5
## 4 No No 2 4.2
## 5 No No 2 3.8
## Rating.color Rating.text Votes
## 1 Green Very Good 2416
## 2 Dark Green Excellent 2369
## 3 Dark Green Excellent 5705
## 4 Green Very Good 1810
## 5 Yellow Good 519
3.4 Cuisine Analysis
cuisine_data <- data %>%
separate_rows(Cuisines, sep = ",\\s*") %>%
group_by(Cuisines) %>%
summarise(Avg_Cost = mean(Average.Cost.for.two, na.rm = TRUE),
Avg_Rating = mean(Aggregate.rating, na.rm = TRUE),
Count = n(),
.groups = 'drop') %>%
arrange(desc(Count))
print(head(cuisine_data, 5))
## # A tibble: 5 × 4
## Cuisines Avg_Cost Avg_Rating Count
## <chr> <dbl> <dbl> <int>
## 1 North Indian 691. 2.51 3946
## 2 Chinese 699. 2.60 2690
## 3 Fast Food 437. 2.55 1963
## 4 Mughlai 728. 2.61 992
## 5 Bakery 398. 2.40 726
3.5 Most Common Cuisines
most_common_cuisines <- cuisine_data %>% head(5)
print(most_common_cuisines)
## # A tibble: 5 × 4
## Cuisines Avg_Cost Avg_Rating Count
## <chr> <dbl> <dbl> <int>
## 1 North Indian 691. 2.51 3946
## 2 Chinese 699. 2.60 2690
## 3 Fast Food 437. 2.55 1963
## 4 Mughlai 728. 2.61 992
## 5 Bakery 398. 2.40 726
3.6 Top Cities with High Rated Restaurants
top_city_high_rated <- data %>%
filter(Aggregate.rating >= 4.0) %>%
count(City, sort = TRUE) %>%
slice(1:3)
print(top_city_high_rated)
## City n
## 1 New Delhi 328
## 2 Gurgaon 95
## 3 Noida 29
3.7 Most Popular Restaurants
top_popular <- data %>%
arrange(desc(Aggregate.rating), desc(Votes)) %>%
select(Restaurant.Name, City, Aggregate.rating, Votes) %>%
head(5)
print(top_popular)
## Restaurant.Name City Aggregate.rating Votes
## 1 Barbeque Nation Kolkata 4.9 5966
## 2 AB's - Absolute Barbecues Hyderabad 4.9 5434
## 3 Mirchi And Mime Mumbai 4.9 3244
## 4 Naturals Ice Cream New Delhi 4.9 2620
## 5 Indian Accent - The Manor New Delhi 4.9 1934
4. Affordability Analysis
4.1 Affordable High-Rated Restaurants
affordable_high_rating <- data %>%
filter(Aggregate.rating > 4.0) %>%
arrange(Average.Cost.for.two) %>%
select(Restaurant.Name, City, Average.Cost.for.two, Aggregate.rating) %>%
head(5)
print(affordable_high_rating)
## Restaurant.Name City Average.Cost.for.two Aggregate.rating
## 1 Sheroes Hangout Agra 0 4.9
## 2 BMG - All Day Dining Dehradun 0 4.3
## 3 Jung Bahadur Kachori Wala New Delhi 50 4.1
## 4 Gurdas Ram Jalebi Wala Amritsar 100 4.1
## 5 Ashok Chaat Corner New Delhi 100 4.1
4.2 Most Expensive Cities
expensive_cities <- data %>%
group_by(City) %>%
summarise(Avg_Cost = mean(Average.Cost.for.two, na.rm = TRUE), .groups = 'drop') %>%
arrange(desc(Avg_Cost)) %>%
head(5)
print(expensive_cities)
## # A tibble: 5 × 2
## City Avg_Cost
## <chr> <dbl>
## 1 Panchkula 2000
## 2 Hyderabad 1361.
## 3 Pune 1338.
## 4 Jaipur 1310
## 5 Kolkata 1272.
5. Feature Engineering
5.1 Price Category
data <- data %>% mutate(PriceCategory = case_when(
Average.Cost.for.two < 300 ~ "Low",
Average.Cost.for.two >= 300 & Average.Cost.for.two <= 800 ~ "Medium",
Average.Cost.for.two > 800 ~ "High"
))
head(select(data, Restaurant.Name, City, Average.Cost.for.two, PriceCategory), 10)
## Restaurant.Name City Average.Cost.for.two
## 1 Jahanpanah Agra 850
## 2 Rangrezz Restaurant Agra 700
## 3 Time2Eat - Mama Chicken Agra 500
## 4 Chokho Jeeman Marwari Jain Bhojanalya Agra 400
## 5 Pinch Of Spice Agra 1000
## 6 MoMo Cafe Agra 2000
## 7 Peshawri - ITC Mughal Agra 2500
## 8 Taj Bano - ITC Mughal Agra 2500
## 9 G Thal Agra 800
## 10 Dawat-e-Nawab - Radisson Blu Agra 3600
## PriceCategory
## 1 High
## 2 Medium
## 3 Medium
## 4 Medium
## 5 High
## 6 High
## 7 High
## 8 High
## 9 Medium
## 10 High
5.2 Popularity Tag
data <- data %>% mutate(PopularityTag = ifelse(Aggregate.rating > 4 & Votes > 500, "Popular", "Regular"))
popular_percentage <- data %>%
group_by(City) %>%
summarise(Total = n(),
Popular = sum(PopularityTag == "Popular"),
Percentage_Popular = round((Popular / Total) * 100, 2),
.groups = 'drop') %>%
arrange(desc(Percentage_Popular))
print(head(popular_percentage, 10))
## # A tibble: 10 × 4
## City Total Popular Percentage_Popular
## <chr> <int> <int> <dbl>
## 1 Panchkula 1 1 100
## 2 Chennai 20 15 75
## 3 Bangalore 20 14 70
## 4 Kolkata 20 14 70
## 5 Hyderabad 18 11 61.1
## 6 Pune 20 11 55
## 7 Secunderabad 2 1 50
## 8 Goa 20 8 40
## 9 Mumbai 20 8 40
## 10 Ahmedabad 21 8 38.1
6. Visualizations
6.1 Distribution of Votes
filtered_data <- data %>% filter(Votes <= quantile(Votes, 0.99, na.rm = TRUE))
ggplot(filtered_data, aes(x = Votes)) +
geom_histogram(bins = 50, fill = "steelblue", color = "black") +
labs(title = "Distribution of Votes Across Restaurants", x = "Number of Votes", y = "Frequency") +
theme_minimal()

6.2 Distribution of Cost
filtered_cost_data <- data %>% filter(Average.Cost.for.two <= quantile(Average.Cost.for.two, 0.99, na.rm = TRUE))
ggplot(filtered_cost_data, aes(x = Average.Cost.for.two)) +
geom_histogram(bins = 50, fill = "darkorange", color = "black") +
labs(title = "Distribution of Average Cost for Two", x = "Average Cost for Two (INR)", y = "Frequency") +
theme_minimal()

7. Modeling
7.1 Simple Linear Regression: Votes vs Cost
simple_model <- lm(Average.Cost.for.two ~ Votes, data = data)
summary(simple_model)
##
## Call:
## lm(formula = Average.Cost.for.two ~ Votes, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3560.1 -283.7 -139.0 78.9 7373.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 569.56724 6.45293 88.27 <2e-16 ***
## Votes 0.39211 0.01436 27.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 571.6 on 8650 degrees of freedom
## Multiple R-squared: 0.07939, Adjusted R-squared: 0.07929
## F-statistic: 746 on 1 and 8650 DF, p-value: < 2.2e-16
# Plot
ggplot(data, aes(x = Votes, y = Average.Cost.for.two)) +
geom_point(alpha = 0.4, color = "gray40") +
geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(title = "Votes vs Average Cost for Two", x = "Votes", y = "Average Cost for Two")
## `geom_smooth()` using formula = 'y ~ x'

7.2 Multiple Linear Regression: Rating
data$OnlineDelivery <- ifelse(data$Has.Online.delivery == "Yes", 1, 0)
tier1_cities <- c("Delhi", "New Delhi", "Mumbai", "Bangalore", "Bengaluru")
tier2_cities <- c("Hyderabad", "Chennai", "Kolkata", "Pune")
data$CityTier <- case_when(
data$City %in% tier1_cities ~ 1,
data$City %in% tier2_cities ~ 2,
TRUE ~ 3
)
multi_model_final <- lm(Aggregate.rating ~ Votes + OnlineDelivery + CityTier, data = data)
summary(multi_model_final)
##
## Call:
## lm(formula = Aggregate.rating ~ Votes + OnlineDelivery + CityTier,
## data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.3257 -1.6020 0.5276 1.0414 2.6337
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.013e+00 3.221e-02 62.493 < 2e-16 ***
## Votes 9.178e-04 3.506e-05 26.178 < 2e-16 ***
## OnlineDelivery 8.992e-01 3.343e-02 26.900 < 2e-16 ***
## CityTier 7.733e-02 1.564e-02 4.944 7.81e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.388 on 8648 degrees of freedom
## Multiple R-squared: 0.1563, Adjusted R-squared: 0.156
## F-statistic: 533.9 on 3 and 8648 DF, p-value: < 2.2e-16
data$Predicted_Rating_Final <- predict(multi_model_final, newdata = data)
ggplot(data, aes(x = Aggregate.rating, y = Predicted_Rating_Final)) +
geom_point(color = "darkorange", alpha = 0.6) +
geom_abline(intercept = 0, slope = 1, color = "blue", linetype = "dashed") +
labs(title = "Actual vs Predicted Restaurant Ratings", x = "Actual Aggregate Rating", y = "Predicted Aggregate Rating") +
theme_minimal()

7.3 Polynomial Regression: Votes vs Cost
poly_cost_votes_model <- lm(Votes ~ poly(Average.Cost.for.two, 2, raw = TRUE), data = data)
summary(poly_cost_votes_model)
##
## Call:
## lm(formula = Votes ~ poly(Average.Cost.for.two, 2, raw = TRUE),
## data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -602.9 -112.6 -40.9 14.6 10420.0
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -8.472e+01 8.286e+00 -10.22
## poly(Average.Cost.for.two, 2, raw = TRUE)1 4.397e-01 1.525e-02 28.83
## poly(Average.Cost.for.two, 2, raw = TRUE)2 -7.017e-05 3.964e-06 -17.70
## Pr(>|t|)
## (Intercept) <2e-16 ***
## poly(Average.Cost.for.two, 2, raw = TRUE)1 <2e-16 ***
## poly(Average.Cost.for.two, 2, raw = TRUE)2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 403.5 on 8649 degrees of freedom
## Multiple R-squared: 0.1116, Adjusted R-squared: 0.1114
## F-statistic: 543.2 on 2 and 8649 DF, p-value: < 2.2e-16
data$Predicted_Votes_Cost <- predict(poly_cost_votes_model, newdata = data)
ggplot(data, aes(x = Average.Cost.for.two, y = Votes)) +
geom_point(color = "blue", alpha = 0.5) +
geom_line(aes(y = Predicted_Votes_Cost), color = "darkblue", linewidth = 1) +
labs(title = "Polynomial Regression: Cost vs Votes", x = "Average Cost for Two", y = "Votes") +
theme_minimal()

8. Statistical Tests
8.1 ANOVA: Online Delivery vs Rating
data$Has.Online.delivery <- as.factor(data$Has.Online.delivery)
anova_result <- aov(Aggregate.rating ~ Has.Online.delivery, data = data)
summary(anova_result)
## Df Sum Sq Mean Sq F value Pr(>F)
## Has.Online.delivery 1 1723 1723.0 826.7 <2e-16 ***
## Residuals 8650 18028 2.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
boxplot(Aggregate.rating ~ Has.Online.delivery, data = data, col = c("tomato", "skyblue"))

8.2 ANOVA: Table Booking vs Votes
data$Has.Table.booking <- as.factor(data$Has.Table.booking)
anova_votes <- aov(Votes ~ Has.Table.booking, data = data)
summary(anova_votes)
## Df Sum Sq Mean Sq F value Pr(>F)
## Has.Table.booking 1 5.606e+07 56056876 317.1 <2e-16 ***
## Residuals 8650 1.529e+09 176758
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
boxplot(Votes ~ Has.Table.booking, data = data, col = c("lightcoral", "lightblue"))

8.3 ANOVA: Average Cost across Cities
selected_data <- subset(data, City %in% c("Mumbai", "New Delhi", "Bangalore"))
selected_data <- selected_data[!is.na(selected_data$Average.Cost.for.two), ]
selected_data$City <- factor(selected_data$City)
anova_city <- aov(Average.Cost.for.two ~ City, data = selected_data)
summary(anova_city)
## Df Sum Sq Mean Sq F value Pr(>F)
## City 2 1.255e+07 6274959 15.61 1.74e-07 ***
## Residuals 5510 2.215e+09 402030
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
boxplot(Average.Cost.for.two ~ City, data = selected_data, col = c("lightblue", "lightpink", "lightgreen"))

8.4 ANOVA: Cuisine Type vs Votes
data$Primary.Cuisine <- sapply(strsplit(as.character(data$Cuisines), ","), `[`, 1)
data$Primary.Cuisine <- trimws(data$Primary.Cuisine)
top3_cuisines <- data %>% count(Primary.Cuisine, sort = TRUE) %>% top_n(3, n) %>% pull(Primary.Cuisine)
filtered_data <- data %>% filter(Primary.Cuisine %in% top3_cuisines)
anova_top3 <- aov(Votes ~ Primary.Cuisine, data = filtered_data)
summary(anova_top3)
## Df Sum Sq Mean Sq F value Pr(>F)
## Primary.Cuisine 2 1992269 996134 9.859 5.34e-05 ***
## Residuals 4474 452042276 101038
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(filtered_data, aes(x = Primary.Cuisine, y = Votes, fill = Primary.Cuisine)) +
geom_boxplot() +
theme_minimal()

9. Correlation Analysis
9.1 Rating vs Cost
cor(data$Aggregate.rating, data$Average.Cost.for.two, use = "complete.obs")
## [1] 0.3441716
ggplot(data, aes(x = Aggregate.rating, y = Average.Cost.for.two)) +
geom_point(color = "blue", alpha = 0.5) +
geom_smooth(method = "lm", color = "red", se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'

9.2 Rating vs Votes
correlation_value <- cor(data$Aggregate.rating, data$Votes, use = "complete.obs")
cor_test_result <- cor.test(data$Aggregate.rating, data$Votes)
print(correlation_value)
## [1] 0.2876924
print(cor_test_result)
##
## Pearson's product-moment correlation
##
## data: data$Aggregate.rating and data$Votes
## t = 27.938, df = 8650, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2682468 0.3069037
## sample estimates:
## cor
## 0.2876924
ggplot(data, aes(x = Aggregate.rating, y = Votes)) +
geom_point(color = "darkgreen", alpha = 0.5) +
geom_smooth(method = "lm", color = "red", se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'

9.3 Online Delivery vs Votes
data$Online_Delivery_Numeric <- ifelse(data$Has.Online.delivery == "Yes", 1, 0)
correlation_value <- cor(data$Online_Delivery_Numeric, data$Votes, use = "complete.obs")
print(correlation_value)
## [1] 0.1047307
ggplot(data, aes(x = Online_Delivery_Numeric, y = Votes)) +
geom_jitter(width = 0.2, height = 0, alpha = 0.5, color = "blue") +
geom_smooth(method = "lm", color = "red", se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'

## Conclusion
## **In this project, we explored the Zomato restaurant data and found many interesting things. Most restaurants are in New Delhi, and North Indian and Chinese food are the most popular cuisines. We also saw that many low-cost restaurants have very good ratings. Features like online delivery and table booking affect customer ratings and votes.
## Our analysis showed that there is a small connection between restaurant cost, votes, and ratings. Overall, this study helps us understand what customers like and how restaurants perform in different cities.**