Loading the data

reviews <- read.csv("Hotel_Reviews.csv", stringsAsFactors = FALSE)

Stripping the white space for Nationality

library(stringr)
reviews$Reviewer_Nationality <- str_trim(reviews$Reviewer_Nationality, side = 'both')

Changing reviewers’ nationality into factor

reviews$Reviewer_Nationality <- as.factor(reviews$Reviewer_Nationality)

Filtering the required dataset

library(dplyr)
sg_reviews <- reviews %>%
filter(Negative_Review!="No Negative", Reviewer_Nationality=='Singapore')

Reading the negative review and pre-processing it

library(tm)
## Loading required package: NLP
neg.corpus <- VCorpus(VectorSource(sg_reviews$Negative_Review))
neg.corpus <- tm_map(neg.corpus, PlainTextDocument)
neg.corpus <- tm_map(neg.corpus, content_transformer(tolower))
neg.corpus <- tm_map(neg.corpus, removeWords, stopwords("english"))

neg.corpus <- tm_map(neg.corpus, removeWords, c('t'))
neg.corpus <- tm_map(neg.corpus, content_transformer(function(x)
  gsub(x, pattern = "rooms", replacement = "room")))

neg.corpus <- tm_map(neg.corpus, removePunctuation)
neg.corpus <- tm_map(neg.corpus, removeNumbers)
neg.corpus <- tm_map(neg.corpus, stripWhitespace)

Converting the corpus into a data frame

library(RWeka)
sg_df <- data.frame(text=unlist(sapply(neg.corpus,
                  '[',"content")), stringsAsFactors=F)

Creating a Bigram

token_delim <- " \\t\\r\\n.!?,;\"()"
bitoken <- NGramTokenizer(sg_df, Weka_control(min=2,max=2, delimiters = token_delim))
two_word <- data.frame(table(bitoken))
sort_two <- two_word[order(two_word$Freq, decreasing = T),]

Visualizing the negative words

library(wordcloud)
## Loading required package: RColorBrewer
wordcloud(sort_two$bitoken, sort_two$Freq, scale = c(3,0.5),
          min.freq = 10, max.words = 100,
          colors = brewer.pal(8, 'Dark2'))