======================================================================================

TELKOMSEL

Extracting Tweets

Retrieve tweets from Twitter

## -- Attaching packages ---------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0     v purrr   0.2.5
## v tibble  1.4.2     v dplyr   0.7.7
## v tidyr   0.8.2     v stringr 1.3.1
## v readr   1.1.1     v forcats 0.3.0
## -- Conflicts ------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter()  masks stats::filter()
## x purrr::flatten() masks rtweet::flatten()
## x dplyr::lag()     masks stats::lag()
## <Token>
## <oauth_endpoint>
##  request:   https://api.twitter.com/oauth/request_token
##  authorize: https://api.twitter.com/oauth/authenticate
##  access:    https://api.twitter.com/oauth/access_token
## <oauth_app> my_twitter_research_app
##   key:    sxxdmMv0ceEXTFN0ZlqsdTcdu
##   secret: <hidden>
## <credentials> oauth_token, oauth_token_secret
## ---
# Retrieve tweets
tweets <- search_tweets("#Telkomsel", n = 8000, tweet_mode="extended")
## Searching for tweets...
## Finished collecting tweets!
tweets <- distinct(tweets, text, .keep_all=TRUE)

Tweets Description

tail(tweets, 20)

Text Cleaning

library(tm)
## Loading required package: NLP
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
## 
##     annotate

Build corpus

## Warning in tm_map.SimpleCorpus(myCorpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(myCorpus, content_transformer(removeURL)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(myCorpus,
## content_transformer(removeNumPunct)): transformation drops documents
## Warning in tm_map.SimpleCorpus(myCorpus, removeWords, myStopwords):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(myCorpus, stripWhitespace): transformation
## drops documents

Frequent Words

Build Term Document Matrix

tdm1 <- TermDocumentMatrix(myCorpus, control = list(wordLengths = c(1, Inf)))
tdm1
## <<TermDocumentMatrix (terms: 658, documents: 167)>>
## Non-/sparse entries: 3314/106572
## Sparsity           : 97%
## Maximal term length: 39
## Weighting          : term frequency (tf)

Top Frequent Terms

freq.terms <- findFreqTerms(tdm1, lowfreq = 20)
freq.terms[1:50]
##  [1] "airasia"      "blibli"       "bpjs"         "citilink"    
##  [5] "garuda"       "hemat"        "infojakarta"  "jakpost"     
##  [9] "kompastv"     "kppu"         "lazada"       "lionair"     
## [13] "metrotv"      "namair"       "okezone"      "pegipegi"    
## [17] "selebrita"    "shopee"       "sriwijayaair" "telkomsel"   
## [21] "trans"        "transtv"      "traveloka"    "tribunnews"  
## [25] "tvone"        "antv"         "grab"         "harga"       
## [29] "indosat"      "jujur"        "beritasatu"   "halobca"     
## [33] "konsumen"     NA             NA             NA            
## [37] NA             NA             NA             NA            
## [41] NA             NA             NA             NA            
## [45] NA             NA             NA             NA            
## [49] NA             NA
term.freq <- rowSums(as.matrix(tdm1))
term.freq <- subset(term.freq, term.freq >= 150)
df <- data.frame(term = names(term.freq), freq = term.freq)
ggplot(df, aes(x=term, y=freq)) + geom_bar(stat="identity") +
  xlab("Terms") + ylab("Count") + coord_flip() +
  theme(axis.text=element_text(size=7))

Wordcloud

Build Wordcloud

## Loading required package: RColorBrewer
wordcloud(words = names(word.freq), freq = word.freq, min.freq = 100,
    random.order = F, colors = pal1)

INDOSAT

Extracting Tweets

Retrieve tweets from Twitter

## <Token>
## <oauth_endpoint>
##  request:   https://api.twitter.com/oauth/request_token
##  authorize: https://api.twitter.com/oauth/authenticate
##  access:    https://api.twitter.com/oauth/access_token
## <oauth_app> my_twitter_research_app
##   key:    sxxdmMv0ceEXTFN0ZlqsdTcdu
##   secret: <hidden>
## <credentials> oauth_token, oauth_token_secret
## ---
# Retrieve tweets
tweets <- search_tweets("#Indosat", n = 8000, tweet_mode="extended")
## Searching for tweets...
## Finished collecting tweets!
tweets <- distinct(tweets, text, .keep_all=TRUE)

Tweets Description

ts_plot(tweets, "3 hours") +
  theme_minimal() +
  theme(plot.title = ggplot2::element_text(face = "bold")) +
  labs(
    x = NULL, y = NULL,
    title = "Frequency of indihome Twitter statuses from past 3 hours",
    subtitle = "Twitter status (tweet) counts aggregated using three-hour intervals",
    caption = "\nSource: Data collected from Twitter's REST API via rtweet"
  )

tail(tweets, 20)

Text Cleaning

library(tm)

Build corpus

# build a corpus, and specify the source to be character vectors 
myCorpus <- Corpus(VectorSource(tweets$text))
# convert to lower case
myCorpus <- tm_map(myCorpus, content_transformer(tolower))
## Warning in tm_map.SimpleCorpus(myCorpus, content_transformer(tolower)):
## transformation drops documents
# remove URLs
removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
myCorpus <- tm_map(myCorpus, content_transformer(removeURL))
## Warning in tm_map.SimpleCorpus(myCorpus, content_transformer(removeURL)):
## transformation drops documents
# remove anything other than English letters or space 
removeNumPunct <- function(x) gsub("[^[:alpha:][:space:]]*", "", x) 
myCorpus <- tm_map(myCorpus, content_transformer(removeNumPunct))
## Warning in tm_map.SimpleCorpus(myCorpus,
## content_transformer(removeNumPunct)): transformation drops documents
# remove stopwords
myStopwords <- c(setdiff(stopwords('english'), c("r", "big")), "use", "see", "used", "via", "amp", "indihome")
stopwords_id <- read.table("E://stopwords-id.txt", header = FALSE)
myStopwords <- c(myStopwords, as.matrix(stopwords_id$V1), "hi", "yg")
myCorpus <- tm_map(myCorpus, removeWords, myStopwords)
## Warning in tm_map.SimpleCorpus(myCorpus, removeWords, myStopwords):
## transformation drops documents
# remove extra whitespace
myCorpus <- tm_map(myCorpus, stripWhitespace)
## Warning in tm_map.SimpleCorpus(myCorpus, stripWhitespace): transformation
## drops documents
# keep a copy for stem completion later
myCorpusCopy <- myCorpus

Frequent Words_2

Build Term Document Matrix_2

tdm <- TermDocumentMatrix(myCorpus, control = list(wordLengths = c(1, Inf)))
tdm
## <<TermDocumentMatrix (terms: 404, documents: 86)>>
## Non-/sparse entries: 1810/32934
## Sparsity           : 95%
## Maximal term length: 20
## Weighting          : term frequency (tf)

Top Frequent Terms_2

freq.terms <- findFreqTerms(tdm, lowfreq = 20)
freq.terms[1:50]
##  [1] "airasia"      "antv"         "blibli"       "citilink"    
##  [5] "garuda"       "grab"         "harga"        "hemat"       
##  [9] "indosat"      "infojakarta"  "jakpost"      "jujur"       
## [13] "lionair"      "metrotv"      "namair"       "okezone"     
## [17] "pegipegi"     "selebrita"    "shopee"       "sriwijayaair"
## [21] "telkomsel"    "trans"        "transtv"      "traveloka"   
## [25] "tvone"        "halobca"      "konsumen"     NA            
## [29] NA             NA             NA             NA            
## [33] NA             NA             NA             NA            
## [37] NA             NA             NA             NA            
## [41] NA             NA             NA             NA            
## [45] NA             NA             NA             NA            
## [49] NA             NA
term.freq <- rowSums(as.matrix(tdm))
term.freq <- subset(term.freq, term.freq >= 150)
df <- data.frame(term = names(term.freq), freq = term.freq)
ggplot(df, aes(x=term, y=freq)) + geom_bar(stat="identity") +
  xlab("Terms") + ylab("Count") + coord_flip() +
  theme(axis.text=element_text(size=7))

Wordcloud_2

Build Wordcloud_2

library(wordcloud)
m <- as.matrix(tdm)
# calculate the frequency of words and sort it by frequency 
word.freq <- sort(rowSums(m), decreasing = T)
# colors
pal <- brewer.pal(9, "BuGn")[-(1:4)]
wordcloud(words = names(word.freq), freq = word.freq, min.freq = 100,
    random.order = F, colors = pal)
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : rhenald could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : robohin could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : sinyalindosat could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : towernya could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : operator could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : pulsailang could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : pulsanya could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : serasa could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : unlimited could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : butut could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : maneh could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : barusan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq
## = 100, : jaringantanpakonplen could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : kebelakang could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : mbps could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : stagnan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : tembus could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : hmmmm could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : komplen could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : paketan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : tampa could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : kena could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : ketahuan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : menghentikan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : sedot could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : bbrp could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : dibeli could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : internasional could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : kesel could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : akses could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : internetjuaraaaa could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : hujan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : lemottttttt could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : musim could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : player could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : bekasi could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : jaya could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : screenshot could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : speedtest could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : wisma could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : limit could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : malam could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : pulatp could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : benarbenar could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : indikasi could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : kamar could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : keseringan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : mandi could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : membawa could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : memegang could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : myim could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : perasaan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : sahabat could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : tahukah could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : terkena could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : jaringanya could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : ngaruh could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : error could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : jelasin could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : jelek could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : beratus could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : darah could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : dibikin could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : dinotif could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : hentikan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : indosatooredo could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : initpstcoiqbrhoypn could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : notif could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : pesan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : pop could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : smpe could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : bebas could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : hambatan could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : jelang could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : koq could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : subuh could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : yabukannya could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = names(word.freq), freq = word.freq, min.freq =
## 100, : yasinyaljelek could not be fit on page. It will not be plotted.