# Based on the: 
# https://sites.google.com/site/miningtwitter/questions/talking-about/wordclouds/wordcloud1

# Load all the required packages
library(twitteR)
library(tm)
library(wordcloud)
library(RColorBrewer)
library(stringi)

## Should be done once for setup, info: http://thinktostart.com/twitter-authentification-with-r/
api_key <- ""
api_secret <- ""
access_token <- ""
access_token_secret <- ""
setup_twitter_oauth(access_token,access_token_secret,api_key,api_secret)

# Let's get some tweets in english containing the words "tsne"
tsne_tweets = searchTwitter("R data visualization", n=1000, lang="en")

# Extract the text from the tweets in a vector
tsne_text = sapply(tsne_tweets, function(x) x$getText())
tsne_text = gsub("(http|https)([^/]+).*", "", tsne_text)
tsne_text <- stringi::stri_trans_general(tsne_text, "latin-ascii")

# create a corpus
tsne_corpus = Corpus(VectorSource(tsne_text))

# create document term matrix applying some transformations
tdm = TermDocumentMatrix(tsne_corpus,
                         control = list(removePunctuation = TRUE,
                                        stopwords = c(stopwords("english")),
                                        removeNumbers = TRUE, tolower = TRUE))

# define tdm as matrix
m = as.matrix(tdm)
# get word counts in decreasing order
word_freqs = sort(rowSums(m), decreasing=TRUE) 
# create a data frame with words and their frequencies
dm = data.frame(word=names(word_freqs), freq=word_freqs)

# plot wordcloud
wordcloud(dm$word, dm$freq, random.order=FALSE, colors=brewer.pal(8, "Dark2"))