References: [http://www.sthda.com/english/wiki/text-mining-and-word-cloud-fundamentals-in-r-5-simple-steps-you-should-know] [https://www.tidytextmining.com/nasa.html#word-co-ocurrences-and-correlations]

library(tm)
## Loading required package: NLP
library(SnowballC)
library(wordcloud)
## Loading required package: RColorBrewer
library(RColorBrewer)
library(stringr)




jobs <- read.csv('data_scientist_united_states_job_postings_jobspikr.csv')

head(jobs)
# Remove non-ascii characters
jobs$job_description <-  gsub("[^\x20-\x7E]", "", jobs$job_description)



descriptions <- Corpus(VectorSource(jobs$job_description))




toSpace <- content_transformer(function (x , pattern ) gsub(pattern, " ", x))
descriptions <- tm_map(descriptions, toSpace, "/")
## Warning in tm_map.SimpleCorpus(descriptions, toSpace, "/"): transformation drops
## documents
descriptions <- tm_map(descriptions, toSpace, "\n")
## Warning in tm_map.SimpleCorpus(descriptions, toSpace, "\n"): transformation
## drops documents
descriptions <- tm_map(descriptions, toSpace, "@")
## Warning in tm_map.SimpleCorpus(descriptions, toSpace, "@"): transformation drops
## documents
descriptions <- tm_map(descriptions, toSpace, "\\|")
## Warning in tm_map.SimpleCorpus(descriptions, toSpace, "\\|"): transformation
## drops documents
descriptions <- tm_map(descriptions, content_transformer(tolower))
## Warning in tm_map.SimpleCorpus(descriptions, content_transformer(tolower)):
## transformation drops documents
descriptions <- tm_map(descriptions, removeWords, stopwords("english"))
## Warning in tm_map.SimpleCorpus(descriptions, removeWords, stopwords("english")):
## transformation drops documents
descriptions <- tm_map(descriptions, removePunctuation)
## Warning in tm_map.SimpleCorpus(descriptions, removePunctuation): transformation
## drops documents
descriptions <- tm_map(descriptions, stripWhitespace)
## Warning in tm_map.SimpleCorpus(descriptions, stripWhitespace): transformation
## drops documents
#Stem the document: reduces common word endings (ing, es, s)
#descriptions <- tm_map(descriptions, stemDocument)


dtm <- TermDocumentMatrix(descriptions)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)

This is how the frequency looks

d
#Set ups the Word Cloud
set.seed(1234)
wordcloud(words = d$word, freq = d$freq, min.freq = 100,
          max.words=200, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))