Restaurant Review Word Cloud

Restaurant with the maximum reviews in Pittsburgh

# @knitr Packages

library(stringr)
library(dplyr)
library(stringr)
library(readr)

pa_reviews <- read_csv("pa_reviews_final_class.csv")
restaurants <- read_csv("restaurant_final_class.csv")

#Building Word Clouds
max_review_restaurant <- pa_reviews %>% group_by(business_id) %>% summarise(n=n()) %>% top_n(1)
max_review_restaurant_name <- restaurants %>% filter(business_id=="SsGNAc9U-aKPZccnaDtFkA") %>% select(name)
max_reviews_data <- pa_reviews %>% filter(business_id=="SsGNAc9U-aKPZccnaDtFkA")
######    
# *On a PC*, save the folder to your *C: drive* and use the following code chunk:  
######
  
dir(max_reviews_data$text)

## character(0)

#Load the R tm package
library(tm)   
docs <- Corpus(VectorSource(pa_reviews$text)) 

## Preprocessing      
docs <- tm_map(docs, removePunctuation)    
docs <- tm_map(docs, removeNumbers)    


#Converting to lower case

docs <- tm_map(docs, tolower)   

#Remove stopwords
docs <- tm_map(docs, removeWords, stopwords("english"))   


docs <- tm_map(docs, stripWhitespace)

docs <- tm_map(docs, PlainTextDocument)  

#Document Term Matrix

dtm <- DocumentTermMatrix(docs)

#Term Document Matrix
tdm <- TermDocumentMatrix(docs)

#Compute Term Frequencies

freq <- colSums(as.matrix(dtm[,10000:15000]))   
length(freq)

## [1] 5001

#word frequencies

wf <- data.frame(word=names(freq), freq=freq)
library(wordcloud)

paste("Reviews about ",max_review_restaurant_name, " has these commonly occuring themes")

## [1] "Reviews about  Meat & Potatoes  has these commonly occuring themes"

wordcloud(wf$word,wf$freq,min.freq=50,max.words=100, random.order=FALSE, rot.per=0.35, use.r.layout=FALSE, colors=brewer.pal(8, "Dark2"))

Restaurant Review Word Cloud

Group 5

2016-12-03

Restaurant with the maximum reviews in Pittsburgh