======================================================================================
## -- Attaching packages ---------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0 v purrr 0.2.5
## v tibble 1.4.2 v dplyr 0.7.7
## v tidyr 0.8.2 v stringr 1.3.1
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts ------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x purrr::flatten() masks rtweet::flatten()
## x dplyr::lag() masks stats::lag()
## <Token>
## <oauth_endpoint>
## request: https://api.twitter.com/oauth/request_token
## authorize: https://api.twitter.com/oauth/authenticate
## access: https://api.twitter.com/oauth/access_token
## <oauth_app> my_twitter_research_app
## key: sxxdmMv0ceEXTFN0ZlqsdTcdu
## secret: <hidden>
## <credentials> oauth_token, oauth_token_secret
## ---
# Retrieve tweets
tweets <- search_tweets("#Telkomsel", n = 8000, tweet_mode="extended")
## Searching for tweets...
## Finished collecting tweets!
tweets <- distinct(tweets, text, .keep_all=TRUE)
tail(tweets, 20)
library(tm)
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
## Warning in tm_map.SimpleCorpus(myCorpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(myCorpus, content_transformer(removeURL)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(myCorpus,
## content_transformer(removeNumPunct)): transformation drops documents
## Warning in tm_map.SimpleCorpus(myCorpus, removeWords, myStopwords):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(myCorpus, stripWhitespace): transformation
## drops documents
tdm1 <- TermDocumentMatrix(myCorpus, control = list(wordLengths = c(1, Inf)))
tdm1
## <<TermDocumentMatrix (terms: 658, documents: 167)>>
## Non-/sparse entries: 3314/106572
## Sparsity : 97%
## Maximal term length: 39
## Weighting : term frequency (tf)
freq.terms <- findFreqTerms(tdm1, lowfreq = 20)
freq.terms[1:50]
## [1] "airasia" "blibli" "bpjs" "citilink"
## [5] "garuda" "hemat" "infojakarta" "jakpost"
## [9] "kompastv" "kppu" "lazada" "lionair"
## [13] "metrotv" "namair" "okezone" "pegipegi"
## [17] "selebrita" "shopee" "sriwijayaair" "telkomsel"
## [21] "trans" "transtv" "traveloka" "tribunnews"
## [25] "tvone" "antv" "grab" "harga"
## [29] "indosat" "jujur" "beritasatu" "halobca"
## [33] "konsumen" NA NA NA
## [37] NA NA NA NA
## [41] NA NA NA NA
## [45] NA NA NA NA
## [49] NA NA
term.freq <- rowSums(as.matrix(tdm1))
term.freq <- subset(term.freq, term.freq >= 150)
df <- data.frame(term = names(term.freq), freq = term.freq)
ggplot(df, aes(x=term, y=freq)) + geom_bar(stat="identity") +
xlab("Terms") + ylab("Count") + coord_flip() +
theme(axis.text=element_text(size=7))
## Loading required package: RColorBrewer
wordcloud(words = names(word.freq), freq = word.freq, min.freq = 100,
random.order = F, colors = pal1)
## <Token>
## <oauth_endpoint>
## request: https://api.twitter.com/oauth/request_token
## authorize: https://api.twitter.com/oauth/authenticate
## access: https://api.twitter.com/oauth/access_token
## <oauth_app> my_twitter_research_app
## key: sxxdmMv0ceEXTFN0ZlqsdTcdu
## secret: <hidden>
## <credentials> oauth_token, oauth_token_secret
## ---
# Retrieve tweets
tweets <- search_tweets("#Indosat", n = 8000, tweet_mode="extended")
## Searching for tweets...
## Finished collecting tweets!
tweets <- distinct(tweets, text, .keep_all=TRUE)
ts_plot(tweets, "3 hours") +
theme_minimal() +
theme(plot.title = ggplot2::element_text(face = "bold")) +
labs(
x = NULL, y = NULL,
title = "Frequency of indihome Twitter statuses from past 3 hours",
subtitle = "Twitter status (tweet) counts aggregated using three-hour intervals",
caption = "\nSource: Data collected from Twitter's REST API via rtweet"
)
tail(tweets, 20)
library(tm)
# build a corpus, and specify the source to be character vectors
myCorpus <- Corpus(VectorSource(tweets$text))
# convert to lower case
myCorpus <- tm_map(myCorpus, content_transformer(tolower))
## Warning in tm_map.SimpleCorpus(myCorpus, content_transformer(tolower)):
## transformation drops documents
# remove URLs
removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
myCorpus <- tm_map(myCorpus, content_transformer(removeURL))
## Warning in tm_map.SimpleCorpus(myCorpus, content_transformer(removeURL)):
## transformation drops documents
# remove anything other than English letters or space
removeNumPunct <- function(x) gsub("[^[:alpha:][:space:]]*", "", x)
myCorpus <- tm_map(myCorpus, content_transformer(removeNumPunct))
## Warning in tm_map.SimpleCorpus(myCorpus,
## content_transformer(removeNumPunct)): transformation drops documents
# remove stopwords
myStopwords <- c(setdiff(stopwords('english'), c("r", "big")), "use", "see", "used", "via", "amp", "indihome")
stopwords_id <- read.table("E://stopwords-id.txt", header = FALSE)
myStopwords <- c(myStopwords, as.matrix(stopwords_id$V1), "hi", "yg")
myCorpus <- tm_map(myCorpus, removeWords, myStopwords)
## Warning in tm_map.SimpleCorpus(myCorpus, removeWords, myStopwords):
## transformation drops documents
# remove extra whitespace
myCorpus <- tm_map(myCorpus, stripWhitespace)
## Warning in tm_map.SimpleCorpus(myCorpus, stripWhitespace): transformation
## drops documents
# keep a copy for stem completion later
myCorpusCopy <- myCorpus
tdm <- TermDocumentMatrix(myCorpus, control = list(wordLengths = c(1, Inf)))
tdm
## <<TermDocumentMatrix (terms: 404, documents: 86)>>
## Non-/sparse entries: 1810/32934
## Sparsity : 95%
## Maximal term length: 20
## Weighting : term frequency (tf)
freq.terms <- findFreqTerms(tdm, lowfreq = 20)
freq.terms[1:50]
## [1] "airasia" "antv" "blibli" "citilink"
## [5] "garuda" "grab" "harga" "hemat"
## [9] "indosat" "infojakarta" "jakpost" "jujur"
## [13] "lionair" "metrotv" "namair" "okezone"
## [17] "pegipegi" "selebrita" "shopee" "sriwijayaair"
## [21] "telkomsel" "trans" "transtv" "traveloka"
## [25] "tvone" "halobca" "konsumen" NA
## [29] NA NA NA NA
## [33] NA NA NA NA
## [37] NA NA NA NA
## [41] NA NA NA NA
## [45] NA NA NA NA
## [49] NA NA
term.freq <- rowSums(as.matrix(tdm))
term.freq <- subset(term.freq, term.freq >= 150)
df <- data.frame(term = names(term.freq), freq = term.freq)
ggplot(df, aes(x=term, y=freq)) + geom_bar(stat="identity") +
xlab("Terms") + ylab("Count") + coord_flip() +
theme(axis.text=element_text(size=7))
library(wordcloud)
m <- as.matrix(tdm)
# calculate the frequency of words and sort it by frequency
word.freq <- sort(rowSums(m), decreasing = T)
# colors
pal <- brewer.pal(9, "BuGn")[-(1:4)]
wordcloud(words = names(word.freq), freq = word.freq, min.freq = 100,
random.order = F, colors = pal)
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : rhenald could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : robohin could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : sinyalindosat could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : towernya could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : operator could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : pulsailang could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : pulsanya could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : serasa could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : unlimited could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : butut could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : maneh could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : barusan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq
## = 100, : jaringantanpakonplen could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : kebelakang could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : mbps could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : stagnan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : tembus could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : hmmmm could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : komplen could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : paketan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : tampa could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : kena could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : ketahuan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : menghentikan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : sedot could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : bbrp could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : dibeli could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : internasional could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : kesel could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : akses could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : internetjuaraaaa could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : hujan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : lemottttttt could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : musim could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : player could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : bekasi could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : jaya could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : screenshot could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : speedtest could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : wisma could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : limit could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : malam could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : pulatp could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : benarbenar could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : indikasi could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : kamar could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : keseringan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : mandi could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : membawa could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : memegang could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : myim could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : perasaan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : sahabat could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : tahukah could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : terkena could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : jaringanya could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : ngaruh could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : error could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : jelasin could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : jelek could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : beratus could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : darah could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : dibikin could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : dinotif could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : hentikan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : indosatooredo could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : initpstcoiqbrhoypn could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : notif could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : pesan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : pop could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : smpe could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : bebas could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : hambatan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : jelang could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : koq could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : subuh could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : yabukannya could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : yasinyaljelek could not be fit on page. It will not be plotted.