Packages used: png, ggplot2,ggmap, directlabels, XML, dplyr, httr
Then create a data frame of rank, restaurant name and no. of reviews.
xpathSApply(html,"//span[@class='reviewCount']",xmlValue)->No_of_Reviews #Scrape No of Reviews
head(No_of_Reviews) #First 6 results
## [1] "\n\n68 reviews \n" "\n\n259 reviews \n" "\n\n194 reviews \n"
## [4] "\n\n105 reviews \n" "\n\n358 reviews \n" "\n\n107 reviews \n"
xpathSApply(html,"//div[@class='popIndexBlock']",xmlValue)->Rank #Scrape Restaurant Rank
head(Rank) #First 6 results
## [1] "\n\n#1 of 468 Restaurants in Worcester\n\n"
## [2] "\n\n#2 of 468 Restaurants in Worcester\n\n"
## [3] "\n\n#3 of 468 Restaurants in Worcester\n\n"
## [4] "\n\n#4 of 468 Restaurants in Worcester\n\n"
## [5] "\n\n#5 of 468 Restaurants in Worcester\n\n"
## [6] "\n\n#6 of 468 Restaurants in Worcester\n\n"
data.frame(Rank=Rank,Restaurant=Restaurant,No_of_Reviews=No_of_Reviews)->worcrest
head(worcrest) #first 6 rows of the data frame
## Rank
## 1 \n\n#1 of 468 Restaurants in Worcester\n\n
## 2 \n\n#2 of 468 Restaurants in Worcester\n\n
## 3 \n\n#3 of 468 Restaurants in Worcester\n\n
## 4 \n\n#4 of 468 Restaurants in Worcester\n\n
## 5 \n\n#5 of 468 Restaurants in Worcester\n\n
## 6 \n\n#6 of 468 Restaurants in Worcester\n\n
## Restaurant No_of_Reviews
## 1 \n\nMare e Monti\n\n \n\n68 reviews \n
## 2 \n\n111 Chop House\n\n \n\n259 reviews \n
## 3 \n\nBocado\n\n \n\n194 reviews \n
## 4 \n\nBaba Sushi\n\n \n\n105 reviews \n
## 5 \n\nO'Connor's Restaurant & Bar\n\n \n\n358 reviews \n
## 6 \n\nThe People's Kitchen & Citizen Wine Bar\n\n \n\n107 reviews \n
worcrest$Rank<-1:30
worcrest<-mutate(worcrest,Restaurant=as.character(Restaurant))
worcrest<-mutate(worcrest,Restaurant=gsub("\n|\\s+$","",worcrest$Restaurant))
worcrest<-mutate(worcrest,No_of_Reviews=gsub("\n|reviews|\\s","",worcrest$No_of_Reviews))
#Cleaned Data
head(worcrest)
## Rank Restaurant No_of_Reviews
## 1 1 Mare e Monti 68
## 2 2 111 Chop House 259
## 3 3 Bocado 194
## 4 4 Baba Sushi 105
## 5 5 O'Connor's Restaurant & Bar 358
## 6 6 The People's Kitchen & Citizen Wine Bar 107
Location=NULL
for(i in 1:30)
{
Location<-c(Location,paste0(worcrest$Restaurant[i],", Worcester, MA, USA"))
}
address=NULL
for(i in 1:30)
{
GET(paste0("www.google.co.in/search?q=",gsub(' ','+',Location[i])))->temphtml
content(temphtml,as="text")->tempcont
readHTMLTable(tempcont)->temptable
address<-c(address,as.character(temptable[[1]]$V2)[1]) #Extract Address from table
}
address
## [1] "19 Wall St, Worcester, MA 01604, United States+1 508-767-1800"
## [2] "111 Shrewsbury St, Worcester, MA 01604, United States+1 508-799-4111"
## [3] "82 Winter St, Worcester, MA 01604, United States+1 508-797-1011"
## [4] "309 Park Ave, Worcester, MA 01609, United States+1 508-752-8822"
## [5] "1878 Andorra St, Navarre, FL 32566, United States+1 850-939-8400"
## [6] "1 Exchange St, Worcester, MA 01608, United States+1 508-459-9090"
## [7] "529 Main St, Worcester, MA 01608, United States+1 508-799-7190"
## [8] "118 Highland St, Worcester, MA 01609, United States+1 508-798-3474"
## [9] "102 Grove St, Worcester, MA 01605, United States+1 508-756-5014"
## [10] "300 Southbridge St, Worcester, MA 01608, United States+1 508-753-5600"
## [11] "1074 W Boylston St, Worcester, MA 01606, United States+1 508-852-6888"
## [12] "158 Southbridge St, Worcester, MA 01608, United States+1 508-753-4362"
## [13] "119 Shrewsbury St, Worcester, MA 01604, United States+1 508-755-7333"
## [14] "139 Green St, Worcester, MA 01604, United States+1 508-363-1111"
## [15] "117 Highland St, Worcester, MA 01609, United States+1 508-756-8458"
## [16] "144 Main St, Worcester, MA 01608, United States+1 508-795-1012"
## [17] "92 Shrewsbury St, Worcester, MA 01604, United States+1 508-796-5915"
## [18] "1"
## [19] "1394 Main St, Worcester, MA 01603, United States+1 508-926-8861"
## [20] "538 Pleasant St, Worcester, MA 01602, United States+1 508-756-2660"
## [21] "609 W Boylston St, Worcester, MA 01606, United States+1 508-755-4451"
## [22] "183 Shrewsbury St # 4, Worcester, MA 01604, United States+1 508-753-9912"
## [23] "593 Park Ave, Worcester, MA 01603, United States+1 508-756-7555"
## [24] "Seymour Street, Ballymoney, County Antrim BT53 6JR, United Kingdom+44 28 2766 3095"
## [25] "156 Shrewsbury St, Worcester, MA 01604, United States+1 508-926-8115"
## [26] "296 Hamilton St, Worcester, MA 01604, United States+1 508-797-5550"
## [27] "1"
## [28] "278 Shrewsbury St, Worcester, MA 01604, United States+1 508-757-1450"
## [29] "104 Shrewsbury St, Worcester, MA 01604, United States+1 508-752-3862"
## [30] "99 LocationsConcord, NH · Dover, NH · Hooksett, NH · Littleton, NH ..."
address[5]<-"1160 W Boylston St, Worcester, MA 01606-1147 United States"
address[18]<-"394 Belmont St, Worcester, MA 01604, United States"
address[27]<-"712 A Pleasant Street, Worcester, MA 01602, United States"
address[24]<-"344 Chandler Street, Ste 1, Worcester, MA 01602-3457, United States"
address[30]<-"11 East Central Street, Worcester, MA, United States"
for(i in 1:30)
{
address[i]<-gsub('\\+','',gsub(gsub("([0-9]+(.*)+(United States))","",address[i]),"",address[i]))
}
address
## [1] "19 Wall St, Worcester, MA 01604, United States"
## [2] "111 Shrewsbury St, Worcester, MA 01604, United States"
## [3] "82 Winter St, Worcester, MA 01604, United States"
## [4] "309 Park Ave, Worcester, MA 01609, United States"
## [5] "1160 W Boylston St, Worcester, MA 01606-1147 United States"
## [6] "1 Exchange St, Worcester, MA 01608, United States"
## [7] "529 Main St, Worcester, MA 01608, United States"
## [8] "118 Highland St, Worcester, MA 01609, United States"
## [9] "102 Grove St, Worcester, MA 01605, United States"
## [10] "300 Southbridge St, Worcester, MA 01608, United States"
## [11] "1074 W Boylston St, Worcester, MA 01606, United States"
## [12] "158 Southbridge St, Worcester, MA 01608, United States"
## [13] "119 Shrewsbury St, Worcester, MA 01604, United States"
## [14] "139 Green St, Worcester, MA 01604, United States"
## [15] "117 Highland St, Worcester, MA 01609, United States"
## [16] "144 Main St, Worcester, MA 01608, United States"
## [17] "92 Shrewsbury St, Worcester, MA 01604, United States"
## [18] "394 Belmont St, Worcester, MA 01604, United States"
## [19] "1394 Main St, Worcester, MA 01603, United States"
## [20] "538 Pleasant St, Worcester, MA 01602, United States"
## [21] "609 W Boylston St, Worcester, MA 01606, United States"
## [22] "183 Shrewsbury St # 4, Worcester, MA 01604, United States"
## [23] "593 Park Ave, Worcester, MA 01603, United States"
## [24] "344 Chandler Street, Ste 1, Worcester, MA 01602-3457, United States"
## [25] "156 Shrewsbury St, Worcester, MA 01604, United States"
## [26] "296 Hamilton St, Worcester, MA 01604, United States"
## [27] "712 A Pleasant Street, Worcester, MA 01602, United States"
## [28] "278 Shrewsbury St, Worcester, MA 01604, United States"
## [29] "104 Shrewsbury St, Worcester, MA 01604, United States"
## [30] "11 East Central Street, Worcester, MA, United States"
worcrest$Address<-address
head(worcrest) #the data frame created
## Rank Restaurant No_of_Reviews
## 1 1 Mare e Monti 68
## 2 2 111 Chop House 259
## 3 3 Bocado 194
## 4 4 Baba Sushi 105
## 5 5 O'Connor's Restaurant & Bar 358
## 6 6 The People's Kitchen & Citizen Wine Bar 107
## Address
## 1 19 Wall St, Worcester, MA 01604, United States
## 2 111 Shrewsbury St, Worcester, MA 01604, United States
## 3 82 Winter St, Worcester, MA 01604, United States
## 4 309 Park Ave, Worcester, MA 01609, United States
## 5 1160 W Boylston St, Worcester, MA 01606-1147 United States
## 6 1 Exchange St, Worcester, MA 01608, United States
longitude=NULL;latitude=NULL
for(i in 1:30)
{
html<-GET(gsub('&','and',(gsub(' ','+',paste0("http://maps.google.com/?q=",worcrest$Restaurant[i],', ',worcrest$Address[i])))))
content(html,as="text")->tempcont
htmlTreeParse(tempcont,useInternalNodes = T)->xyz
xmlRoot(xyz)->root
xpathSApply(root,'//script',xmlValue)[1]->asd
#strsplit(asd," ")[[1]][51]
as.numeric(strsplit(strsplit(asd," ")[[1]][51],',|]')[[1]][2])->lon
as.numeric(strsplit(strsplit(asd," ")[[1]][51],',|]')[[1]][3])->lat
if((lat<42|lat>43)|(lon<(-72)|lon>-71))
{
html<-GET(gsub('&','and',(gsub(' ','+',paste0("http://maps.google.com/?q=",worcrest$Address[i])))))
content(html,as="text")->tempcont
htmlTreeParse(tempcont,useInternalNodes = T)->xyz
xmlRoot(xyz)->root
xpathSApply(root,'//script',xmlValue)[1]->asd
#strsplit(asd," ")[[1]][51]
as.numeric(strsplit(strsplit(asd," ")[[1]][51],',|]')[[1]][2])->lon
as.numeric(strsplit(strsplit(asd," ")[[1]][51],',|]')[[1]][3])->lat
}
latitude=c(latitude,lat)
longitude=c(longitude,lon)
}
worcrest$Latitude<-latitude;worcrest$Longitude<-longitude
head(worcrest)
## Rank Restaurant No_of_Reviews
## 1 1 Mare e Monti 68
## 2 2 111 Chop House 259
## 3 3 Bocado 194
## 4 4 Baba Sushi 105
## 5 5 O'Connor's Restaurant & Bar 358
## 6 6 The People's Kitchen & Citizen Wine Bar 107
## Address Latitude
## 1 19 Wall St, Worcester, MA 01604, United States 42.25761
## 2 111 Shrewsbury St, Worcester, MA 01604, United States 42.26353
## 3 82 Winter St, Worcester, MA 01604, United States 42.25882
## 4 309 Park Ave, Worcester, MA 01609, United States 42.26196
## 5 1160 W Boylston St, Worcester, MA 01606-1147 United States 42.33574
## 6 1 Exchange St, Worcester, MA 01608, United States 42.26566
## Longitude
## 1 -71.78645
## 2 -71.78955
## 3 -71.79422
## 4 -71.82014
## 5 -71.78840
## 6 -71.79956
map <- get_map(location = 'Worcester, MA, USA', zoom = 12)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Worcester,+MA,+USA&zoom=12&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Worcester,+MA,+USA&sensor=false
df<-data.frame(lat=worcrest$Latitude,lon=worcrest$Longitude,labels=worcrest$Restaurant)
asd<-ggmap(map) +geom_point(data = df, aes(x = df$lon, y = df$lat,fill=paste0(1:30,', ',labels)), alpha = .5, pch = 24, size = log(as.numeric(worcrest$No_of_Reviews))*2) + labs(x = 'Longitude', y = 'Latitude') +geom_dl(data = df, aes(label = 1:30), list(dl.trans(y = y + 0.2),"top.bumptwice", cex = .7,alpha=1, fontface = "bold", family = "Helvetica"))+ guides(fill=guide_legend(ncol=2,byrow=TRUE))+ggtitle("Top Restaurants in Worcester, US")+ scale_fill_discrete(name = "Size: No. of Reviews, Rank, Rest. Name")
## Loading required package: proto
| Rank | Restaurant | No_of_Reviews | Address | Latitude | Longitude |
|---|---|---|---|---|---|
| 1 | Mare e Monti | 68 | 19 Wall St, Worcester, MA 01604, United States | 42.26 | -71.79 |
| 2 | 111 Chop House | 259 | 111 Shrewsbury St, Worcester, MA 01604, United States | 42.26 | -71.79 |
| 3 | Bocado | 194 | 82 Winter St, Worcester, MA 01604, United States | 42.26 | -71.79 |
| 4 | Baba Sushi | 105 | 309 Park Ave, Worcester, MA 01609, United States | 42.26 | -71.82 |
| 5 | O’Connor’s Restaurant & Bar | 358 | 1160 W Boylston St, Worcester, MA 01606-1147 United States | 42.34 | -71.79 |
| 6 | The People’s Kitchen & Citizen Wine Bar | 107 | 1 Exchange St, Worcester, MA 01608, United States | 42.27 | -71.80 |
| 7 | Theatre Cafe | 25 | 529 Main St, Worcester, MA 01608, United States | 42.26 | -71.80 |
| 8 | Sole Proprietor | 476 | 118 Highland St, Worcester, MA 01609, United States | 42.27 | -71.81 |
| 9 | Lucky’s Cafe | 50 | 102 Grove St, Worcester, MA 01605, United States | 42.28 | -71.80 |
| 10 | Miss Worcester Diner | 54 | 300 Southbridge St, Worcester, MA 01608, United States | 42.25 | -71.81 |
| 11 | Lou Roc’s Diner | 60 | 1074 W Boylston St, Worcester, MA 01606, United States | 42.33 | -71.79 |
| 12 | Coney Island Lunch | 108 | 158 Southbridge St, Worcester, MA 01608, United States | 42.26 | -71.80 |
| 13 | Pomir Grill | 61 | 119 Shrewsbury St, Worcester, MA 01604, United States | 42.26 | -71.79 |
| 14 | Smokestack Urban Barbecue | 119 | 139 Green St, Worcester, MA 01604, United States | 42.26 | -71.80 |
| 15 | Boynton Family Restaurant | 207 | 117 Highland St, Worcester, MA 01609, United States | 42.27 | -71.81 |
| 16 | Armsby Abbey | 189 | 144 Main St, Worcester, MA 01608, United States | 42.27 | -71.80 |
| 17 | Nuovo | 54 | 92 Shrewsbury St, Worcester, MA 01604, United States | 42.26 | -71.79 |
| 18 | Mi Reyna Latino Tapas & Bar | 6 | 394 Belmont St, Worcester, MA 01604, United States | 42.27 | -71.76 |
| 19 | Livia’s Dish | 35 | 1394 Main St, Worcester, MA 01603, United States | 42.24 | -71.85 |
| 20 | Sol of Mexico | 15 | 538 Pleasant St, Worcester, MA 01602, United States | 42.26 | -71.82 |
| 21 | Eggroll Lady & Fish Shack | 19 | 609 W Boylston St, Worcester, MA 01606, United States | 42.31 | -71.80 |
| 22 | La Scala Ristorante | 6 | 183 Shrewsbury St # 4, Worcester, MA 01604, United States | 42.27 | -71.79 |
| 23 | Pho Dakao | 36 | 593 Park Ave, Worcester, MA 01603, United States | 42.25 | -71.83 |
| 24 | Joey’s Bar & Grill | 50 | 344 Chandler Street, Ste 1, Worcester, MA 01602-3457, United States | 42.26 | -71.83 |
| 25 | Meze Greek Tapas | 24 | 156 Shrewsbury St, Worcester, MA 01604, United States | 42.27 | -71.79 |
| 26 | Shaker’s Cafe and Restaurant | 28 | 296 Hamilton St, Worcester, MA 01604, United States | 42.26 | -71.77 |
| 27 | Newton Square Pizza | 5 | 712 A Pleasant Street, Worcester, MA 01602, United States | 42.27 | -71.83 |
| 28 | Flying Rhino Cafe | 109 | 278 Shrewsbury St, Worcester, MA 01604, United States | 42.27 | -71.78 |
| 29 | Brew City Grill & Brew House | 113 | 104 Shrewsbury St, Worcester, MA 01604, United States | 42.26 | -71.79 |
| 30 | 99 Restaurant & Pub | 39 | 11 East Central Street, Worcester, MA, United States | 42.33 | -71.65 |