This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

plot(cars)

#Load all the required packages
library(twitteR)
library(tm)
## Loading required package: NLP
library(wordcloud)
## Loading required package: RColorBrewer
library(RColorBrewer)
api_key <- 'tmnHUiIUG0IxzY2pun3PMsX3E'
api_secret <- '2HTt4XEnn5sgpBu5X7CXQ5gUVaamjRsMTG4vFf8OgPcyAfpEht'
access_token <- '963219292518076416-XE17xWzOmdTaSfdt1FdeaJoPYHjE63s'
access_token_secret <-'taHl8bXUWDGFDH3OBelpiM97Qfhe4fNuz4WrKX8Kjj4TD'

setup_twitter_oauth(api_key, api_secret, access_token, access_token_secret)
## [1] "Using direct authentication"
tweets <- searchTwitter("Superbowl",n=500)

#count the number of tweets
n.tweets <- length(tweets)
n.tweets
## [1] 500
#converts into  dataframe
tweets.df <-twListToDF(tweets)
View(tweets.df)

#clean
library(tm)
mycorpus <- Corpus(VectorSource(tweets.df$text))
mycorpus <- tm_map(mycorpus, removeWords, stopwords())

remove_url <- function(x) gsub("http[^[:space:]]*","",x)
mycorpus <- tm_map(mycorpus,content_transformer(remove_url))

#remove anything other than english letters and space
removeNumPunct <- function(x) gsub("[^[:alpha:][:space:]]*","",x)
mycorpus <- tm_map(mycorpus, content_transformer(removeNumPunct))
mycorpus <- tm_map(mycorpus, content_transformer(tolower))
mycorpus <- tm_map(mycorpus, stripWhitespace)
#mycorpus <- tm_map(mycorpus, stemDocument)

#dtm <-DocumentTermMatrix(mycorpus)
  
library(wordcloud)
wordcloud(mycorpus, min.freq = 5)

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

Load all the required packages

library(twitteR) library(tm) library(wordcloud) library(RColorBrewer)

api_key <- ‘tmnHUiIUG0IxzY2pun3PMsX3E’ api_secret <- ‘2HTt4XEnn5sgpBu5X7CXQ5gUVaamjRsMTG4vFf8OgPcyAfpEht’ access_token <- ‘963219292518076416-XE17xWzOmdTaSfdt1FdeaJoPYHjE63s’ access_token_secret <-’taHl8bXUWDGFDH3OBelpiM97Qfhe4fNuz4WrKX8Kjj4TD setup_twitter_oauth(api_key, api_secret, access_token, access_token_secret)

Let’s get some tweets in english containing the words “machine learning”

mach_tweets = searchTwitter(“machine learning”, n=500, lang=“en”)

Extract the text from the tweets in a vector

mach_text = sapply(mach_tweets, function(x) x$getText())

create a corpus

mach_corpus = Corpus(VectorSource(mach_text))

create document term matrix applying some transformations

tdm = TermDocumentMatrix(mach_corpus, control = list(removePunctuation = TRUE, stopwords = c(“machine”, “learning”, stopwords(“english”)), removeNumbers = TRUE, tolower = TRUE)) # define tdm as matrix m = as.matrix(tdm) # get word counts in decreasing order word_freqs = sort(rowSums(m), decreasing=TRUE) # create a data frame with words and their frequencies dm = data.frame(word=names(word_freqs), freq=word_freqs)

plot wordcloud

wordcloud(dm\(word, dm\)freq, random.order=FALSE, colors=brewer.pal(8, “Dark2”))

save the image in png format

png(“MachineLearningCloud.png”, width=12, height=8, units=“in”, res=300) wordcloud(dm\(word, dm\)freq, random.order=FALSE, colors=brewer.pal(8, “Dark2”)) dev.off()

load library

library(twitteR) setup_twitter_oauth(api_key, api_secret, access_token, access_token_secret) library(‘tm’) library(‘RColorBrewer’) library(‘wordcloud’)