###################################
#### TWEET TEXT ANALYSIS ##########
###################################
apple <- read.csv('apple.csv')
str(apple)
## 'data.frame': 1000 obs. of 16 variables:
## $ text : chr "RT @option_snipper: $AAPL beat on both eps and revenues. SEES 4Q REV. $49B-$52B, EST. $49.1B https://t.co/hfHXqj0IOB" "RT @option_snipper: $AAPL beat on both eps and revenues. SEES 4Q REV. $49B-$52B, EST. $49.1B https://t.co/hfHXqj0IOB" "Let's see this break all timers. $AAPL 156.89" "RT @SylvaCap: Things might get ugly for $aapl with the iphone delay. With $aapl down that means almost all of t"| __truncated__ ...
## $ favorited : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ favoriteCount: int 0 0 0 0 0 0 0 0 0 0 ...
## $ replyToSN : chr NA NA NA NA ...
## $ created : chr "2017-08-01 20:31:56" "2017-08-01 20:31:55" "2017-08-01 20:31:55" "2017-08-01 20:31:55" ...
## $ truncated : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ replyToSID : num NA NA NA NA NA NA NA NA NA NA ...
## $ id : num 8.92e+17 8.92e+17 8.92e+17 8.92e+17 8.92e+17 ...
## $ replyToUID : num NA NA NA NA NA NA NA NA NA NA ...
## $ statusSource : chr "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>" "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>" "<a href=\"http://stocktwits.com\" rel=\"nofollow\">StockTwits Web</a>" "<a href=\"http://twitter.com/download/android\" rel=\"nofollow\">Twitter for Android</a>" ...
## $ screenName : chr "KnowledgeMC" "Migcortina" "beckyhiu" "MarveiTheBoxer" ...
## $ retweetCount : int 3 3 0 85 0 30 30 9 10 1 ...
## $ isRetweet : logi TRUE TRUE FALSE TRUE FALSE TRUE ...
## $ retweeted : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ longitude : logi NA NA NA NA NA NA ...
## $ latitude : logi NA NA NA NA NA NA ...
library(tm)
## Warning: package 'tm' was built under R version 4.0.5
## Loading required package: NLP
corpus <- iconv(apple$text, to = 'utf-8')
corpus <- Corpus(VectorSource(corpus))
inspect(corpus[1:5])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 5
##
## [1] RT @option_snipper: $AAPL beat on both eps and revenues. SEES 4Q REV. $49B-$52B, EST. $49.1B https://t.co/hfHXqj0IOB
## [2] RT @option_snipper: $AAPL beat on both eps and revenues. SEES 4Q REV. $49B-$52B, EST. $49.1B https://t.co/hfHXqj0IOB
## [3] Let's see this break all timers. $AAPL 156.89
## [4] RT @SylvaCap: Things might get ugly for $aapl with the iphone delay. With $aapl down that means almost all of the FANG stocks were down pos…
## [5] $AAPL - wow! This was supposed to be a throw-away quarter and AAPL beats by over 500 million in revenue! Trillion dollar company by 2018!
corpus <- tm_map(corpus, tolower)
## Warning in tm_map.SimpleCorpus(corpus, tolower): transformation drops documents
inspect(corpus[1:5])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 5
##
## [1] rt @option_snipper: $aapl beat on both eps and revenues. sees 4q rev. $49b-$52b, est. $49.1b https://t.co/hfhxqj0iob
## [2] rt @option_snipper: $aapl beat on both eps and revenues. sees 4q rev. $49b-$52b, est. $49.1b https://t.co/hfhxqj0iob
## [3] let's see this break all timers. $aapl 156.89
## [4] rt @sylvacap: things might get ugly for $aapl with the iphone delay. with $aapl down that means almost all of the fang stocks were down posã¢â‚¬â¦
## [5] $aapl - wow! this was supposed to be a throw-away quarter and aapl beats by over 500 million in revenue! trillion dollar company by 2018!
corpus <- tm_map(corpus, removePunctuation)
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
inspect(corpus[1:5])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 5
##
## [1] rt optionsnipper aapl beat on both eps and revenues sees 4q rev 49b52b est 491b httpstcohfhxqj0iob
## [2] rt optionsnipper aapl beat on both eps and revenues sees 4q rev 49b52b est 491b httpstcohfhxqj0iob
## [3] lets see this break all timers aapl 15689
## [4] rt sylvacap things might get ugly for aapl with the iphone delay with aapl down that means almost all of the fang stocks were down posã¢â‚¬â¦
## [5] aapl wow this was supposed to be a throwaway quarter and aapl beats by over 500 million in revenue trillion dollar company by 2018
corpus <- tm_map(corpus, removeNumbers)
## Warning in tm_map.SimpleCorpus(corpus, removeNumbers): transformation drops
## documents
inspect(corpus[1:5])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 5
##
## [1] rt optionsnipper aapl beat on both eps and revenues sees q rev bb est b httpstcohfhxqjiob
## [2] rt optionsnipper aapl beat on both eps and revenues sees q rev bb est b httpstcohfhxqjiob
## [3] lets see this break all timers aapl
## [4] rt sylvacap things might get ugly for aapl with the iphone delay with aapl down that means almost all of the fang stocks were down posã¢â‚¬â¦
## [5] aapl wow this was supposed to be a throwaway quarter and aapl beats by over million in revenue trillion dollar company by
cleanset <- tm_map(corpus, removeWords, stopwords('english'))
## Warning in tm_map.SimpleCorpus(corpus, removeWords, stopwords("english")):
## transformation drops documents
inspect(cleanset[1:5])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 5
##
## [1] rt optionsnipper aapl beat eps revenues sees q rev bb est b httpstcohfhxqjiob
## [2] rt optionsnipper aapl beat eps revenues sees q rev bb est b httpstcohfhxqjiob
## [3] lets see break timers aapl
## [4] rt sylvacap things might get ugly aapl iphone delay aapl means almost fang stocks posã¢â‚¬â¦
## [5] aapl wow supposed throwaway quarter aapl beats million revenue trillion dollar company
removeURL <- function(x) gsub('http[[:alnum:]]*', '', x)
cleanset <- tm_map(cleanset, content_transformer(removeURL))
## Warning in tm_map.SimpleCorpus(cleanset, content_transformer(removeURL)):
## transformation drops documents
inspect(cleanset[1:5])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 5
##
## [1] rt optionsnipper aapl beat eps revenues sees q rev bb est b
## [2] rt optionsnipper aapl beat eps revenues sees q rev bb est b
## [3] lets see break timers aapl
## [4] rt sylvacap things might get ugly aapl iphone delay aapl means almost fang stocks posã¢â‚¬â¦
## [5] aapl wow supposed throwaway quarter aapl beats million revenue trillion dollar company
cleanset <- tm_map(cleanset, removeWords, c('aapl', 'apple'))
## Warning in tm_map.SimpleCorpus(cleanset, removeWords, c("aapl", "apple")):
## transformation drops documents
cleanset <- tm_map(cleanset, gsub,
pattern = 'stocks',
replacement = 'stock')
## Warning in tm_map.SimpleCorpus(cleanset, gsub, pattern = "stocks", replacement =
## "stock"): transformation drops documents
cleanset <- tm_map(cleanset, stemDocument)
## Warning in tm_map.SimpleCorpus(cleanset, stemDocument): transformation drops
## documents
cleanset <- tm_map(cleanset, stripWhitespace)
## Warning in tm_map.SimpleCorpus(cleanset, stripWhitespace): transformation drops
## documents
inspect(cleanset[1:5])
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 5
##
## [1] rt optionsnipp beat ep revenu see q rev bb est b
## [2] rt optionsnipp beat ep revenu see q rev bb est b
## [3] let see break timer
## [4] rt sylvacap thing might get ugli iphon delay mean almost fang stock posã¢â‚¬â¦
## [5] wow suppos throwaway quarter beat million revenu trillion dollar compani
tdm <- TermDocumentMatrix(cleanset)
tdm <- as.matrix(tdm)
tdm[1:10, 1:20]
## Docs
## Terms 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## beat 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## est 1 1 0 0 0 2 2 0 0 0 0 0 0 2 0 0 2 2 0 0
## optionsnipp 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## rev 1 1 0 0 0 1 1 0 0 1 0 0 1 0 0 0 1 1 0 0
## revenu 1 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## see 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## break 0 0 1 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0
## let 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## timer 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## almost 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
w <- rowSums(tdm)
w <- subset(w, w>=25)
barplot(w,
las = 2,
col = rainbow(50))

library(wordcloud)
## Warning: package 'wordcloud' was built under R version 4.0.5
## Loading required package: RColorBrewer
w <- sort(rowSums(tdm), decreasing = TRUE)
set.seed(222)
wordcloud(words = names(w),
freq = w,
max.words = 150,
random.order = F,
min.freq = 1,
colors = brewer.pal(8, 'Dark2'),
scale = c(5, 1),
rot.per = 0.5)
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : discuss could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : premium could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : past could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : dollar could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : benzinga could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : long could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : thestreet could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : goog could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : green could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : estim could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : jimcram could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : amc could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : cnbcclosingbel could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : need could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : back could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : robinhoodapp could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : nvda could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : posit could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : analysi could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : may could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : peterlusk could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : skew could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : price could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(w), freq = w, max.words = 150, random.order =
## F, : last could not be fit on page. It will not be plotted.

library(wordcloud2)
## Warning: package 'wordcloud2' was built under R version 4.0.5
w <- data.frame(names(w), w)
colnames(w) <- c('word', 'freq')
wordcloud2(w,
size = 0.7,
shape = 'triangle',
rotateRatio = 0.5,
minSize = 1)