Analyzing customer review of a product by text mining to find most frequent words to know the trend.

library("tm")
## Warning: package 'tm' was built under R version 4.1.2
## Loading required package: NLP
## Warning: package 'NLP' was built under R version 4.1.1
library("SnowballC")
## Warning: package 'SnowballC' was built under R version 4.1.1
library("wordcloud")
## Warning: package 'wordcloud' was built under R version 4.1.2
## Loading required package: RColorBrewer
## Warning: package 'RColorBrewer' was built under R version 4.1.1
library("RColorBrewer")
text <- readLines("C:/Users/Punalur/Documents/abc.txt")
head(text, 3)
## [1] "This case serves it's function well - absorbs falls, it's grippy soft (but hard) and holds the phone in place well. This is the second one of these I've purchased, so I must be satisfied. But it also means the last one eventually broke. I can say I used the last one over a year and dropped my phone several times, never taking any damage. The front edge is raised just enough to protect the screen. Eventually, the bottom corner of the case cracked and, although it never completely separated, I figured I'd change the case before pushing my luck further."
## [2] "I would have bought the exact same red one, but color options are dwindling. I settled for mint and it was a little lighter than I expected. I'd call the color \"robin egg\", not mint. M'eh"                                                                                                                                                                                                                                                                                                                                                                               
## [3] "Great little phone case, not too bulky and the colour is great! The only things I'm not thrilled about: it's a very slippery phone case and the case doesn't fully cover the bottom of the phone, so if you drop it you could still damage your phone. Otherwise, super cute!"
document <- Corpus(VectorSource(text))
# cleaning the text
document <- tm_map(document, content_transformer(tolower)) # change to lowercase
## Warning in tm_map.SimpleCorpus(document, content_transformer(tolower)):
## transformation drops documents
document <- tm_map(document, removeNumbers) # remove numbers
## Warning in tm_map.SimpleCorpus(document, removeNumbers): transformation drops
## documents
document <- tm_map(document, removeWords, stopwords("english")) # remove stopwords in English
## Warning in tm_map.SimpleCorpus(document, removeWords, stopwords("english")):
## transformation drops documents
document <- tm_map(document, removeWords, c("also")) # specify own stopwords to be removed
## Warning in tm_map.SimpleCorpus(document, removeWords, c("also")): transformation
## drops documents
# Build table withthe words and their frequency
dtm <- TermDocumentMatrix(document)
m <- as.matrix(dtm) # Error: cannot allocate vector of size 39.1 Gb
v <- sort(rowSums(m), decreasing = TRUE)
D <- data.frame(word = names(v), frequency = v)
head(D, 10)
##            word frequency
## case       case        15
## phone     phone         9
## colour   colour         7
## nice       nice         7
## got         got         5
## enough   enough         4
## just       just         4
## protect protect         4
## well       well         4
## little   little         4
# Create word cloud
set.seed(1234)
wordcloud(words = D$word, freq = D$frequency, min.freq = 1,
          max.words = 20, random.order = FALSE, rot.per = 0.05,
          colors = brewer.pal(8, "Dark2"))

# Examin frequent terms and their association
findAssocs(dtm, terms = "phone", corlimit = 0.2)
## $phone
##          case        bottom         well.         super       protect 
##          0.73          0.67          0.67          0.67          0.46 
##       absorbs      although        broke.        change    completely 
##          0.45          0.45          0.45          0.45          0.45 
##        corner       damage.          edge    eventually   eventually, 
##          0.45          0.45          0.45          0.45          0.45 
##        falls,       figured         front      function        grippy 
##          0.45          0.45          0.45          0.45          0.45 
##         hard)         holds          last          luck         means 
##          0.45          0.45          0.45          0.45          0.45 
##          must         never           one         place    purchased, 
##          0.45          0.45          0.45          0.45          0.45 
##       pushing        raised    satisfied.           say       screen. 
##          0.45          0.45          0.45          0.45          0.45 
##        second    separated,        serves       several          soft 
##          0.45          0.45          0.45          0.45          0.45 
##        taking          used          year         bulky         cover 
##          0.45          0.45          0.45          0.45          0.45 
##         cute!        damage          drop         fully         great 
##          0.45          0.45          0.45          0.45          0.45 
##        great!        phone.         still        things      thrilled 
##          0.45          0.45          0.45          0.45          0.45 
##        (clove    absolutely       amazon.       another        better 
##          0.45          0.45          0.45          0.45          0.45 
##    fantastic!          feel      justice.       person,        photos 
##          0.45          0.45          0.45          0.45          0.45 
##      pictured       purple)       purple.       quality      quality! 
##          0.45          0.45          0.45          0.45          0.45 
##      sturdier sturdy/strong           way           yet       dropped 
##          0.45          0.45          0.45          0.45          0.39 
##        enough        times,        phone,      slippery       exactly 
##          0.39          0.39          0.39          0.39          0.39 
##         feels       cracked      silicone         light        colour 
##          0.39          0.27          0.27          0.27          0.22
# find frequent terms
findFreqTerms(dtm, lowfreq = 2)
##  [1] "bottom"     "can"        "case"       "cracked"    "dropped"   
##  [6] "enough"     "just"       "last"       "never"      "one"       
## [11] "phone"      "protect"    "times,"     "well"       "well."     
## [16] "color"      "little"     "mint"       "case,"      "colour"    
## [21] "otherwise," "phone,"     "slippery"   "super"      "case."     
## [26] "got"        "nice"       "really"     "silicone"   "exactly"   
## [31] "feels"      "like"       "love"       "protection" "orange"    
## [36] "red."       "good"       "slip"       "light"      "pretty"    
## [41] "screen"
# Plot most frequent words
barplot(D[1:10,]$frequency, las = 2, names.arg = D[1:10,]$word,
        col = "lightblue", main = "Most frequent words",
        ylab = "Word frequencies")