{
"id": "16774318",
"name": "Otto Enoteca & Pizzeria",
"url": "https://www.zomato.com/new-york-city/otto-enoteca-pizzeria-greenwich-village",
"location": {
"address": "1 5th Avenue, New York, NY 10003",
"locality": "Greenwich Village",
"city": "New York City",
"latitude": "40.732013",
"longitude": "-73.996155",
"zipcode": "10003",
"country_id": "216"
},
"average_cost_for_two": "60",
"price_range": "2",
"currency": "$",
"thumb": "https://b.zmtcdn.com/data/pictures/chains/8/16774318/a54deb9e4dbb79dd7c8091b30c642077_featured_thumb.png",
"featured_image": "https://d.zmtcdn.com/data/pictures/chains/8/16774318/a54deb9e4dbb79dd7c8091b30c642077_featured_v2.png",
"photos_url": "https://www.zomato.com/new-york-city/otto-enoteca-pizzeria-greenwich-village/photos#tabtop",
"menu_url": "https://www.zomato.com/new-york-city/otto-enoteca-pizzeria-greenwich-village/menu#tabtop",
"events_url": "https://www.zomato.com/new-york-city/otto-enoteca-pizzeria-greenwich-village/events#tabtop",
"user_rating": {
"aggregate_rating": "3.7",
"rating_text": "Very Good",
"rating_color": "5BA829",
"votes": "1046"
},
"has_online_delivery": "0",
"is_delivering_now": "0",
"has_table_booking": "0",
"deeplink": "zomato://r/16774318",
"cuisines": "Cafe",
"all_reviews_count": "15",
"photo_count": "18",
"phone_numbers": "(212) 228-2930",
I have used Mongodb’s cloud database platform ATLAS to store the details of various restaurants.Each restaurant is a separae document in the database.
from pymongo import MongoClient
import requests
import random
import configparser
import csv
from google.cloud import storage;
import os
import json
from bson import json_util
from bson.json_util import dumps
from google.cloud import bigquery
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'zomatoproject-b09246d74291.json'
parser = configparser.ConfigParser()
parser.read("pipeline.conf")
hostname = parser.get("mongo_config", "hostname")
username = parser.get("mongo_config", "username")
password = parser.get("mongo_config", "password")
database_name = parser.get("mongo_config", "database")
collection_name = parser.get("mongo_config", "collection")
mongo_client = MongoClient( "mongodb+srv://" + username + ":" + password + "@" + hostname + "/" +database_name+ "?authSource=admin&replicaSet=atlas-9b62sm-shard-0&readPreference=primary&appname=MongoDB%20Compass&ssl=true" )
mongo_db = mongo_client['ZOMATO']
mongo_collection = mongo_db[collection_name]
mongo_query = {"price_range" : { "$gte": 0 } }
restaurants = mongo_collection.find(mongo_query, batch_size=300)
Google Cloud Storage is a RESTful online file storage web service for storing and accessing data on Google Cloud Platform infrastructure.
def json_to_gcp():
storage_client = storage.Client()
bucket = storage_client.bucket('restaurantdata0694')
blob = bucket.blob('data.json')
data=json.loads(blob.download_as_string())
all_restaurants=[]
for restaurant in data:
id=str(restaurant.get("id"))
price_range=str(restaurant.get("price_range"))
average_cost_for2=str(restaurant.get("average_cost_for_two"))
city=str(restaurant.get("location",{}).get("city"))
user_rating=str(restaurant.get('user_rating',{}).get('aggregate_rating'))
latitude= str(restaurant.get("location",{}).get('latitude'))
longitude=str(restaurant.get("location",{}).get('longitude'))
online_delivery=str(restaurant.get("has_online_delivery"))
cuisine=str((restaurant.get("cuisines")).split(",")[0])
if(len(restaurant.get("establishment"))>=1):
establishment=str(restaurant.get('establishment')[0])
else:
establishment='0'
hotel=[]
hotel.append((id))
hotel.append(price_range)
hotel.append(average_cost_for2)
hotel.append(city)
hotel.append(user_rating)
hotel.append(latitude)
hotel.append(longitude)
hotel.append(online_delivery)
hotel.append(cuisine)
hotel.append(establishment)
all_restaurants.append(hotel)
export_file= "Restaurants_info.csv"
with open(export_file, 'w',newline='') as restaurant_file:
csv_write = csv.writer(restaurant_file, delimiter=',')
csv_write.writerows(all_restaurants)
restaurant_file.close()
storage_client = storage.Client()
bucket = storage_client.bucket('restaurantdata0694')
blob = bucket.blob('restaurantdata0694.csv')
blob.upload_from_filename('Restaurants_info.csv')
Google Cloud Functions is a serverless execution environment for building and connecting cloud services. With Cloud Functions you write simple, single-purpose functions that are attached to events emitted from your cloud infrastructure and services.
def warehouse_load():
client = bigquery.Client()
job_config = bigquery.LoadJobConfig(
schema=[
bigquery.SchemaField("id", "INTEGER","NULLABLE"),
bigquery.SchemaField("price_range", "INTEGER","NULLABLE"),
bigquery.SchemaField("average_cost_for2", "INTEGER","NULLABLE"),
bigquery.SchemaField("city", "STRING","NULLABLE"),
bigquery.SchemaField("user_rating", "FLOAT","NULLABLE"),
bigquery.SchemaField("latitude", "FLOAT","NULLABLE"),
bigquery.SchemaField("longitude", "FLOAT","NULLABLE"),
bigquery.SchemaField("online_delivery", "INTEGER","NULLABLE"),
bigquery.SchemaField("cuisine", "STRING","NULLABLE"),
bigquery.SchemaField("establishment", "STRING","NULLABLE"),
],
skip_leading_rows=1,
source_format=bigquery.SourceFormat.CSV,
)
uri = "https://storage.cloud.google.com/restaurantdata0694/restaurantdata0694.csv"
table_id= "zomatoproject.restaurant_data.restaurant_data1"
load_job = client.load_table_from_uri(
uri, table_id, job_config=job_config
)
load_job.result() # Waits for the job to complete.
destination_table = client.get_table(table_id)
print("Loaded {} rows.".format(destination_table.num_rows))
warehouse_load()
BigQuery is a fully-managed, serverless data warehouse that enables scalable analysis over petabytes of data.