#load libraries
library(tm)
## Warning: package 'tm' was built under R version 3.4.2
## Loading required package: NLP
library(qdap)
## Warning: package 'qdap' was built under R version 3.4.2
## Loading required package: qdapDictionaries
## Loading required package: qdapRegex
## Warning: package 'qdapRegex' was built under R version 3.4.2
## Loading required package: qdapTools
## Warning: package 'qdapTools' was built under R version 3.4.2
## Loading required package: RColorBrewer
## 
## Attaching package: 'qdap'
## The following objects are masked from 'package:tm':
## 
##     as.DocumentTermMatrix, as.TermDocumentMatrix
## The following object is masked from 'package:NLP':
## 
##     ngrams
## The following object is masked from 'package:base':
## 
##     Filter
library(RWeka)
## Warning: package 'RWeka' was built under R version 3.4.2
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 3.4.2
a <- read.csv("c:\\incident\\ticket.csv",stringsAsFactors = FALSE)
tickets<-a$text
#Mke th vector a VCorpus object
comments <- VectorSource(tickets)

# Make a volatile corpus
comments_corpus <- VCorpus(comments)

clean_corpus <- function(corpus){
  corpus <- tm_map(corpus, stripWhitespace)
  corpus <- tm_map(corpus, removePunctuation)
  corpus <- tm_map(corpus, content_transformer(tolower))
  corpus <- tm_map(corpus, removeWords, c(stopwords("en"),"informed","went","place","one","noticed","reason"))
  return(corpus)
}

clean_corp <- clean_corpus(comments_corpus)
# Make tokenizer function 
tokenizer <- function(x) 
  NGramTokenizer(x, Weka_control(min = 2, max = 2))

# Create bigram_dtm
bigram_dtm <- DocumentTermMatrix(
  clean_corp, 
  control = list(tokenize = tokenizer)
)

# Create bigram_dtm_m
bigram_dtm_m <- as.matrix(bigram_dtm)

# Create freq
freq <- colSums(bigram_dtm_m)

# Create bi_words
bi_words <- names(freq)


# Plot a wordcloud
wordcloud(bi_words, freq, max.words = 100)
## Warning in wordcloud(bi_words, freq, max.words = 100): performance bad
## could not be fit on page. It will not be plotted.