Week 3 R Notebook

Install necessary packages

# install.packages('tm')
# install.packages('RColorBrewer')
# install.packages('wordcloud')
library('tm')

## Loading required package: NLP

library('RColorBrewer')
library('wordcloud')

Process data

# Function to clean tweets
clean.text = function(x)
{
  # tolower
  x = tolower(x)
  # remove rt
  x = gsub("rt", "", x)
  # remove at
  x = gsub("@\\w+", "", x)
  # remove punctuation
  x = gsub("[[:punct:]]", "", x)
  # remove numbers
  x = gsub("[[:digit:]]", "", x)
  # remove links http
  x = gsub("http\\w+", "", x)
  # remove tabs
  x = gsub("[ |\t]{2,}", "", x)
  # remove blank spaces at the beginning
  x = gsub("^ ", "", x)
  # remove blank spaces at the end
  x = gsub(" $", "", x)
  return(x)
}

Collect tweets from Twitter API

## [1] "Using direct authentication"

## Warning in doRppAPICall("search/tweets", n, params = params,
## retryOnRateLimit = retryOnRateLimit, : 1000 tweets were requested but the
## API can only return 100

## Warning in wordcloud(head(dm$word, 50), head(dm$freq, 50), random.order
## = FALSE, : eduaubdedubuamark could not be fit on page. It will not be
## plotted.

##              stud          brooklyn eduaubdedubuamark       liubrooklyn 
##                24                22                20                20 
##           clienta               web             litea               day 
##                17                17                16                13 
##               liu        yourrights              game            center 
##                13                13                12                10 
##    neduaubdedubud         steinberg               big              nana 
##                10                10                 8                 8 
##          androida           francis           gameday   neduaubdedububa 
##                 7                 6                 6                 5