Restaurant with the maximum reviews in Pittsburgh
# @knitr Packages
library(stringr)
library(dplyr)
library(stringr)
library(readr)
pa_reviews <- read_csv("pa_reviews_final_class.csv")
restaurants <- read_csv("restaurant_final_class.csv")
#Building Word Clouds
max_review_restaurant <- pa_reviews %>% group_by(business_id) %>% summarise(n=n()) %>% top_n(1)
max_review_restaurant_name <- restaurants %>% filter(business_id=="SsGNAc9U-aKPZccnaDtFkA") %>% select(name)
max_reviews_data <- pa_reviews %>% filter(business_id=="SsGNAc9U-aKPZccnaDtFkA")
######
# *On a PC*, save the folder to your *C: drive* and use the following code chunk:
######
dir(max_reviews_data$text)
## character(0)
#Load the R tm package
library(tm)
docs <- Corpus(VectorSource(pa_reviews$text))
## Preprocessing
docs <- tm_map(docs, removePunctuation)
docs <- tm_map(docs, removeNumbers)
#Converting to lower case
docs <- tm_map(docs, tolower)
#Remove stopwords
docs <- tm_map(docs, removeWords, stopwords("english"))
docs <- tm_map(docs, stripWhitespace)
docs <- tm_map(docs, PlainTextDocument)
#Document Term Matrix
dtm <- DocumentTermMatrix(docs)
#Term Document Matrix
tdm <- TermDocumentMatrix(docs)
#Compute Term Frequencies
freq <- colSums(as.matrix(dtm[,10000:15000]))
length(freq)
## [1] 5001
#word frequencies
wf <- data.frame(word=names(freq), freq=freq)
library(wordcloud)
paste("Reviews about ",max_review_restaurant_name, " has these commonly occuring themes")
## [1] "Reviews about Meat & Potatoes has these commonly occuring themes"
wordcloud(wf$word,wf$freq,min.freq=50,max.words=100, random.order=FALSE, rot.per=0.35, use.r.layout=FALSE, colors=brewer.pal(8, "Dark2"))
