Natural Language Processing

Getting Started

# install.packages('tm')
# install.packages('twitteR')
# install.packages('wordcloud')
# install.packages('RColorBrewer')
# install.packages('e1017')
# install.packages('class')

Go to apps.twitter.com and get your own token and access keys.

library(twitteR) library(tm) library(wordcloud) library(RColorBrewer)

## Loading required package: NLP

## Loading required package: RColorBrewer

Connect to Twitter

# my own key is saved in strings under these variables.
# get your own from your twitter app. 
setup_twitter_oauth(ckey, skey, token, sectoken)

## [1] "Using direct authentication"

# you will probabbaly need to make a selection of 1 or 2, choose 1

thanos.tweets <- searchTwitter('thanos', n= 1000, lang = 'en')

thanos.text <- sapply(thanos.tweets, function(x) x$getText())

thanos.text <- iconv(thanos.text, 'UTF-8', 'ASCII')
thanos.corpus <- Corpus(VectorSource(thanos.text))

# this is using the tm library
term.doc.matrix <- TermDocumentMatrix(thanos.corpus, control = list(removePunctuation = T, stopwords = c('thanos',stopwords('english')),removeNumbers=T, tolower=T))

term.doc.matrix <- as.matrix(term.doc.matrix)

word.freq <- sort(rowSums(term.doc.matrix), decreasing = T)
dm <- data.frame(word=names(word.freq), freq = word.freq)

wordcloud(dm$word, dm$freq, random.order = F, colors = brewer.pal(8, 'Dark2'))

## Warning in wordcloud(dm$word, dm$freq, random.order = F, colors =
## brewer.pal(8, : httpstcoqukpectf could not be fit on page. It will not be
## plotted.

Natural Language Processing

Ethan Duan

May 7, 2019

Natural Language Processing

Getting Started